Browse Source

partial support for multi root graph in online debugger

tags/v1.6.0
Parastoo Ashtari 4 years ago
parent
commit
7f682ba2f6
13 changed files with 195 additions and 66 deletions
  1. +49
    -6
      mindspore/ccsrc/debug/debug_services.cc
  2. +4
    -0
      mindspore/ccsrc/debug/debug_services.h
  3. +97
    -42
      mindspore/ccsrc/debug/debugger/debugger.cc
  4. +15
    -1
      mindspore/ccsrc/debug/debugger/debugger.h
  5. +12
    -7
      mindspore/ccsrc/debug/debugger/debugger_utils.cc
  6. +4
    -2
      mindspore/ccsrc/debug/debugger/debugger_utils.h
  7. +3
    -2
      mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
  8. +2
    -1
      mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h
  9. +2
    -1
      mindspore/ccsrc/runtime/device/device_address.h
  10. +3
    -2
      mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
  11. +2
    -1
      mindspore/ccsrc/runtime/device/gpu/gpu_device_address.h
  12. +1
    -1
      mindspore/ccsrc/runtime/framework/actor/debug_actor.cc
  13. +1
    -0
      mindspore/ccsrc/runtime/framework/graph_compiler.cc

+ 49
- 6
mindspore/ccsrc/debug/debug_services.cc View File

@@ -335,7 +335,41 @@ void DebugServices::SetTensorToNotInUse(const std::shared_ptr<TensorData> &tenso
}
#endif

#ifdef ONLINE_DBG_MODE
bool DebugServices::CompareCurrentRootGraph(uint32_t id) {
auto debugger = Debugger::GetInstance();
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
std::string device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
auto cur_root_graph_id = debugger->GetCurrentRootGraphId();
if ((device_target == kGPUDevice && MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) ||
device_target == kAscendDevice) {
if (cur_root_graph_id != id) {
return false;
}
}
return true;
}

const void *DebugServices::PreparePrevTensor(uint32_t *prev_num_elements, const std::string &tensor_name) {
std::shared_ptr<TensorData> prev_tensor_data;
if (!CompareCurrentRootGraph(Debugger::GetInstance()->GetPrevRootGraphId())) {
// not supporting watchpoints that need prev tensor for multi root graph networks.
MS_LOG(DEBUG) << "Previous root graph is different from current root graph, setting prev_tensor to nullptr.";
prev_tensor_data = nullptr;
} else {
prev_tensor_data = tensor_loader_->GetPrevTensor(tensor_name);
}
if (prev_tensor_data) {
*prev_num_elements = prev_tensor_data->GetNumElements();
return prev_tensor_data->GetDataPtr();
}
return nullptr;
}
#endif

void DebugServices::CheckHistoryErrorCode(int *error_code, bool history_not_found) {
// check history error_code only for offline debugger
if (history_not_found) {
*error_code = ITensorSummary::HISTORY_NOT_FOUND; // error code for history not found
}
@@ -401,13 +435,14 @@ void DebugServices::CheckWatchpointsForTensor(
bool history_not_found = 0;
previous_tensor_ptr = GetPrevTensor(tensor, previous_iter_tensor_needed, &prev_num_elements, &history_not_found);
#else
std::shared_ptr<TensorData> prev_tensor_data = tensor_loader_->GetPrevTensor(tensor_name);
if (prev_tensor_data) {
previous_tensor_ptr = prev_tensor_data->GetDataPtr();
prev_num_elements = prev_tensor_data->GetNumElements();
if (!CompareCurrentRootGraph(tensor->GetRootGraphId())) {
MS_LOG(DEBUG)
<< "Current root_graph_id is different from tensor's root_graph_id, skipping checkwatchpoints for tensor: "
<< tensor->GetName();
continue;
}
previous_tensor_ptr = PreparePrevTensor(&prev_num_elements, tensor_name);
#endif

std::unique_ptr<ITensorSummary> base_summary_ptr;
if (!(watchpoints_to_check.size() == 1 && watchpoints_to_check[0].condition.type == IS_OVERFLOW)) {
base_summary_ptr = GetSummaryPtr(tensor, previous_tensor_ptr, num_elements, prev_num_elements, tensor_dtype);
@@ -440,7 +475,6 @@ void DebugServices::CheckWatchpointsForTensor(
tensor->GetDeviceId(), tensor->GetRootGraphId(), parameter_list, error_code);
}
}

#ifdef OFFLINE_DBG_MODE
SetTensorToNotInUse(tensor, previous_tensor_ptr);
// in offline mode remove the need for the data
@@ -448,6 +482,7 @@ void DebugServices::CheckWatchpointsForTensor(
#endif
}
}

void DebugServices::CheckWatchpoints(
std::vector<std::string> *const name, std::vector<std::string> *const slot, std::vector<int> *const condition,
std::vector<unsigned int> *const watchpoint_id, std::vector<std::vector<parameter_t>> *const parameters,
@@ -1362,6 +1397,14 @@ void DebugServices::ReadNodesTensors(const std::vector<std::string> &name, std::
if (std::get<1>(result) == nullptr) {
continue;
}
#ifdef ONLINE_DBG_MODE
if (!CompareCurrentRootGraph(std::get<1>(result)->GetRootGraphId())) {
MS_LOG(INFO) << "tensor root_graph_id: " << std::get<1>(result)->GetRootGraphId()
<< " is different from cur_root_graph_id: " << Debugger::GetInstance()->GetCurrentRootGraphId()
<< ".";
MS_LOG(INFO) << "Not reading tensor: " << std::get<0>(result) << ".";
}
#endif
(void)ret_name->emplace_back(std::get<0>(result));
(void)data_ptr->emplace_back(reinterpret_cast<const char *>(std::get<1>(result)->GetDataPtr()));
(void)data_size->emplace_back(std::get<1>(result)->GetByteSize());


+ 4
- 0
mindspore/ccsrc/debug/debug_services.h View File

@@ -260,6 +260,8 @@ class DebugServices {
const std::vector<parameter_t> &parameter_list);
#endif

const void *PreparePrevTensor(uint32_t *prev_num_elements, const std::string &tensor_name);

void CheckHistoryErrorCode(int *error_code, bool history_not_found);

void CheckWatchpointsForTensor(partitioned_names *chunk_names, partitioned_names *chunk_slots,
@@ -411,6 +413,8 @@ class DebugServices {
bool IsWatchPoint(const std::string &kernel_name, const CNodePtr &kernel = nullptr) const;

bool IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const;

bool CompareCurrentRootGraph(uint32_t id);
#endif

std::vector<std::shared_ptr<TensorData>> GetTensor() const;


+ 97
- 42
mindspore/ccsrc/debug/debugger/debugger.cc View File

@@ -77,6 +77,8 @@ Debugger::Debugger()
node_name_(""),
cur_name_(""),
training_done_(false),
send_metadata_done_(false),
received_new_graph_(false),
is_dataset_graph_(false),
partial_memory_(false),
initial_suspend_(true),
@@ -284,20 +286,35 @@ void Debugger::PreExecuteGraphDebugger(const std::vector<KernelGraphPtr> &graphs
}
// Store graphs that are run in one step.
graph_ptr_step_vec_ = graphs;
prev_root_graph_id_ = cur_root_graph_id_;
// set first run graph as the root graph
cur_root_graph_id_ = graph_ptr_step_vec_[0]->graph_id();
MS_LOG(DEBUG) << "Current root graph id: " << cur_root_graph_id_ << " prev_root_graph_id_: " << prev_root_graph_id_
<< " for step: " << num_step_ << ".";
MS_LOG(DEBUG) << "Set root graph for all the subgraphs:";
for (size_t graph_index = 0; graph_index < graphs.size(); ++graph_index) {
const auto &graph = graphs[graph_index];
// set root graph id for GPU mindrt runtime.
MS_LOG(DEBUG) << "Set root graph for graph: " << graph->graph_id() << " to: " << cur_root_graph_id_ << ".";
graph->set_root_graph_id(cur_root_graph_id_);
if (debugger_) {
debugger_->PreExecute(graph);
}
}
}

void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) {
MS_EXCEPTION_IF_NULL(graph_ptr);
// access lock for public method
std::lock_guard<std::mutex> a_lock(access_lock_);
CheckDatasetSinkMode();
auto graph_id = graph_ptr->graph_id();
void Debugger::SetCurrentAndPrevRootGraph(uint32_t root_graph_id) {
// for GPU root graphs are set in PreExecuteGraphDebugger.
if (device_target_ != kAscendDevice) {
return;
}
prev_root_graph_id_ = cur_root_graph_id_;
cur_root_graph_id_ = root_graph_id;
MS_LOG(DEBUG) << "Current root graph id: " << cur_root_graph_id_ << " prev_root_graph_id_: " << prev_root_graph_id_
<< " for step: " << num_step_ << ".";
}

void Debugger::StoreRunGraphIdList(uint32_t graph_id) {
// collect rungrap_ids to update step number in multigraph case
if (!rungraph_id_list_.size()) {
rungraph_id_list_.push_back(graph_id);
@@ -307,6 +324,17 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) {
rungraph_id_list_.push_back(graph_id);
}
}
}

void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) {
MS_EXCEPTION_IF_NULL(graph_ptr);
// access lock for public method
std::lock_guard<std::mutex> a_lock(access_lock_);
CheckDatasetSinkMode();
auto graph_id = graph_ptr->graph_id();
MS_LOG(DEBUG) << "PreExecute for graph: " << graph_id << " in step: " << num_step_ << ".";
StoreRunGraphIdList(graph_id);
SetCurrentAndPrevRootGraph(graph_ptr->root_graph_id());
// multiple graphs
if (graph_proto_list_.size() > 1) {
// there are more than one graphs are not dataset_graph
@@ -315,20 +343,22 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) {
}
} else if (graph_proto_list_.size() == 1) {
// single graph, and not the initial step
if (device_target_ == kGPUDevice && num_step_ != 0) {
if (device_target_ == kGPUDevice && !MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT) &&
num_step_ != 0) {
if (debugger_enabled_ && !(run_level_ == "node" && suspended_at_last_kernel_)) {
CommandLoop();
}
debug_services_->ResetLoadedTensors();
}
// In single graph case, reset graph_ptr_ to be nullptr for the initial step
if (num_step_ == 0) {
// In single graph case, reset graph_ptr_ to be nullptr when debugger receives a new graph
if (received_new_graph_) {
graph_ptr_ = nullptr;
CheckGraphPtr(graph_ptr);
}
} else if (debugger_enabled_ && graph_id == rungraph_id_list_.front() && device_target_ == kGPUDevice) {
} else if (debugger_enabled_ && graph_id == rungraph_id_list_.front() && device_target_ == kGPUDevice &&
!MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
// Multiple graph, and not the initial step,
// stop only when receive the first sub run graph for each step
// stop only when receive the first sub run graph for each step for old runtime
// if we have stopped for the last kernel before, no need to stop again
if (pipeline::GraphExecutorPy::GetDebugTerminate()) {
return;
@@ -359,6 +389,7 @@ void Debugger::SendMultiGraphsAndClear(const KernelGraphPtr &graph_ptr) {
SendMultiGraphsAndSuspend(graph_proto_list_);

graph_proto_list_.clear();
received_new_graph_ = false;
}
}

@@ -474,14 +505,19 @@ void Debugger::PostExecute() {
}
SendWatchpoints(CheckWatchpoints());

// no need to suspend at each graph for GPU, suspension happens in preExecute
if (device_target_ != kGPUDevice) {
// no need to suspend at each graph for GPU old runtime, suspension happens in preExecute
if (device_target_ == kAscendDevice) {
CommandLoop();
} else if (device_target_ == kGPUDevice && MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
if (!(run_level_ == "node" && suspended_at_last_kernel_)) {
CommandLoop();
}
}
}
// Only keep parameters in the current map
// GPU ResetLoadedTensors happens in preExecute
if (device_target_ != kGPUDevice) {
// Only keep parameters in th current map
// GPU ResetLoadedTensors for old runtime happens in preExecute
if ((device_target_ == kGPUDevice && MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) ||
device_target_ == kAscendDevice) {
debug_services_->ResetLoadedTensors();
}
}
@@ -534,6 +570,7 @@ void Debugger::LoadGraphs(const KernelGraphPtr &graph_ptr) {
MS_EXCEPTION_IF_NULL(graph_ptr);
if (graph_ptr_ != graph_ptr) {
MS_LOG(INFO) << "LoadGraphs Debugger got new graph: " << graph_ptr->graph_id();
received_new_graph_ = true;
// save new graph_ptr
graph_ptr_ = graph_ptr;
CheckDatasetGraph();
@@ -559,12 +596,16 @@ void Debugger::CheckGraphPtr(const KernelGraphPtr &graph_ptr) {
graph_ptr_ = graph_ptr;
if (!is_dataset_graph_) {
// only try to enable debugger if it is not a dataset graph
EnableDebugger();
if (!debugger_enabled_) {
EnableDebugger();
}
if (debugger_enabled_) {
LoadParametersAndConst();
// get graph proto and send to Mindinsight
auto graph_proto = graph_proto_list_.front();
SendGraphAndSuspend(graph_proto);
graph_proto_list_.clear();
received_new_graph_ = false;
}
}
}
@@ -636,16 +677,17 @@ void Debugger::SendHeartbeat(int32_t period) {
}

void Debugger::SendGraphAndSuspend(const GraphProto &graph_proto) {
if (SendMetadata(true)) {
// send graph to Mindinsight server
MS_EXCEPTION_IF_NULL(grpc_client_);
EventReply reply = grpc_client_->SendGraph(graph_proto);
if (reply.status() != reply.OK) {
MS_LOG(ERROR) << "Error: SendGraph failed";
}
// enter command loop, wait and process commands
CommandLoop();
if (!CheckSendMetadata()) {
return;
}
// send graph to Mindinsight server
MS_EXCEPTION_IF_NULL(grpc_client_);
EventReply reply = grpc_client_->SendGraph(graph_proto);
if (reply.status() != reply.OK) {
MS_LOG(ERROR) << "Error: SendGraph failed";
}
// enter command loop, wait and process commands
CommandLoop();
}

bool Debugger::SendMetadata(bool version_check) {
@@ -695,7 +737,7 @@ bool Debugger::SendMetadata(bool version_check) {
}

void Debugger::SendMultiGraphsAndSuspend(const std::list<GraphProto> &graph_proto_list) {
if (!SendMetadata(true)) {
if (!CheckSendMetadata()) {
return;
}
MS_EXCEPTION_IF_NULL(grpc_client_);
@@ -732,10 +774,20 @@ void Debugger::SendMultiGraphsAndSuspend(const std::list<GraphProto> &graph_prot
CommandLoop();
}

bool Debugger::CheckSendMetadata() {
if (!send_metadata_done_) {
if (!SendMetadata(true)) {
return false;
}
send_metadata_done_ = true;
}
return true;
}

void Debugger::CommandLoop() {
// prepare metadata
MS_EXCEPTION_IF_NULL(graph_ptr_);
std::string device_name = std::to_string(device_id_) + ":" + std::to_string(graph_ptr_->graph_id());
std::string device_name = std::to_string(device_id_) + ":" + std::to_string(cur_root_graph_id_);
Metadata metadata;

metadata.set_device_name(device_name);
@@ -1051,8 +1103,8 @@ std::list<TensorBase> Debugger::LoadTensorsBase(const ProtoVector<TensorProto> &
debug_services_->SearchNodesTensors(name, &result_list);
for (auto result : result_list) {
auto tensor = std::get<1>(result);
if (!tensor) {
// tensor was not found, creating empty tensor base.
if (!tensor || cur_root_graph_id_ != tensor->GetRootGraphId()) {
// tensor was not found or tensor's graph was not executed in the current step, creating empty tensor base.
TensorBase tensor_base_item;
tensor_base_item.set_data_size(0);
tensor_base_item.set_data_type(0);
@@ -1080,8 +1132,8 @@ std::list<TensorSummary> Debugger::LoadTensorsStat(const ProtoVector<TensorProto
debug_services_->SearchNodesTensors(name, &result_list);
for (auto result : result_list) {
auto tensor = std::get<1>(result);
if (!tensor) {
// tensor was not found, creating empty tensor summary.
if (!tensor || cur_root_graph_id_ != tensor->GetRootGraphId()) {
// tensor was not found or tensor's graph was not executed in the current step, creating empty tensor summary.
DebugServices::TensorStat tensor_stat;
AddTensorStatInfo(tensor_stat, &tensor_summary_list);
continue;
@@ -1326,7 +1378,7 @@ bool Debugger::CheckIp(const std::string &host) const {

uint32_t Debugger::GetFirstRunGraphId() const { return rungraph_id_list_.front(); }

void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index) {
void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index, uint32_t root_graph_id) {
MS_EXCEPTION_IF_NULL(anf_node);
if (!anf_node->isa<Parameter>() && !anf_node->isa<ValueNode>()) {
return;
@@ -1362,7 +1414,7 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output
} else {
keep_prev = false;
}
bool ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, keep_prev);
bool ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, keep_prev, root_graph_id);
if (!ret) {
MS_LOG(ERROR) << "LoadMemToHost:"
<< ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
@@ -1374,35 +1426,36 @@ void Debugger::LoadParametersAndConst() {
MS_EXCEPTION_IF_NULL(graph_ptr_);
// load parameters
MS_LOG(INFO) << "Start to load Parameters for graph " << graph_ptr_->graph_id() << ".";
auto root_graph_id = graph_ptr_->root_graph_id();
const auto &parameters = graph_ptr_->inputs();
for (auto &item : parameters) {
LoadSingleAnfnode(item, PARAMETER_OUTPUT_INDEX);
LoadSingleAnfnode(item, PARAMETER_OUTPUT_INDEX, root_graph_id);
}
// load value nodes
// get all constant values from the graph
MS_LOG(INFO) << "Start to load value nodes for graph " << graph_ptr_->graph_id() << ".";
const auto value_nodes = graph_ptr_->graph_value_nodes();
for (auto &item : value_nodes) {
LoadSingleAnfnode(item, VALUE_NODE_OUTPUT_INDEX);
LoadSingleAnfnode(item, VALUE_NODE_OUTPUT_INDEX, root_graph_id);
}
}

void Debugger::LoadParametersAndConst(const KernelGraphPtr &graph) {
if (!(debugger_enabled_ || CheckDebuggerDumpEnabled())) return;
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(graph_ptr_);
// load parameters
MS_LOG(INFO) << "Start to load Parameters for graph " << graph->graph_id() << ".";
const auto &parameters = graph_ptr_->inputs();
auto root_graph_id = graph->root_graph_id();
const auto &parameters = graph->inputs();
for (auto &item : parameters) {
LoadSingleAnfnode(item, PARAMETER_OUTPUT_INDEX);
LoadSingleAnfnode(item, PARAMETER_OUTPUT_INDEX, root_graph_id);
}
// load value nodes
// get all constant values from the graph
MS_LOG(INFO) << "Start to load value nodes for graph " << graph->graph_id() << ".";
const auto value_nodes = graph_ptr_->graph_value_nodes();
const auto value_nodes = graph->graph_value_nodes();
for (auto &item : value_nodes) {
LoadSingleAnfnode(item, VALUE_NODE_OUTPUT_INDEX);
LoadSingleAnfnode(item, VALUE_NODE_OUTPUT_INDEX, root_graph_id);
}
}

@@ -1410,6 +1463,7 @@ void Debugger::LoadGraphOutputs() {
if (!(debugger_enabled() && device_target_ == kAscendDevice)) return;
MS_EXCEPTION_IF_NULL(graph_ptr_);
const auto &apply_kernels = graph_ptr_->execution_order();
auto root_graph_id = graph_ptr_->root_graph_id();
// for kernels, execution order starts from 1
int exec_order = 1;
for (const auto &node : apply_kernels) {
@@ -1435,7 +1489,7 @@ void Debugger::LoadGraphOutputs() {
auto format = kOpFormat_DEFAULT;
string tensor_name = kernel_name + ':' + std::to_string(j);
ShapeVector int_shapes = trans::GetRuntimePaddingShape(node, j);
auto ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false);
auto ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false, root_graph_id);
if (!ret) {
MS_LOG(ERROR) << "LoadMemToHost:"
<< ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
@@ -1463,6 +1517,7 @@ void Debugger::UpdateStepNumGPU() {
// access lock for public method
std::lock_guard<std::mutex> a_lock(access_lock_);
++num_step_;
MS_LOG(DEBUG) << "Update step for GPU, current step: " << num_step_;
}
}



+ 15
- 1
mindspore/ccsrc/debug/debugger/debugger.h View File

@@ -80,6 +80,10 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
// do nothing if graph is set already
void PreExecute(const KernelGraphPtr &graph_ptr);

void SetCurrentAndPrevRootGraph(uint32_t root_graph_id);

void StoreRunGraphIdList(uint32_t graph_id);

// analyze tensors and wait for command
// don't need a graph_ptr because it is saved during pre_execute
void PostExecute();
@@ -131,6 +135,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
// version_check should be true if you want the function to do backend compatibility check with Mindinsight
bool SendMetadata(bool version_check);

bool CheckSendMetadata();

void LoadParametersAndConst();

void LoadParametersAndConst(const KernelGraphPtr &graph);
@@ -149,6 +155,10 @@ class Debugger : public std::enable_shared_from_this<Debugger> {

uint32_t GetFirstRunGraphId() const;

uint32_t GetCurrentRootGraphId() const { return cur_root_graph_id_; }

uint32_t GetPrevRootGraphId() const { return prev_root_graph_id_; }

void SetGraphPtr(const KernelGraphPtr &graph_ptr) { graph_ptr_ = graph_ptr; }

const KernelGraphPtr GetGraphPtr() const { return graph_ptr_; }
@@ -246,7 +256,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
// Check if the IP is valid
bool CheckIp(const std::string &host) const;

void LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index);
void LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index, uint32_t root_graph_id);

// class members

@@ -263,9 +273,13 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
std::string node_name_;
std::string cur_name_;
bool training_done_;
bool send_metadata_done_;
bool received_new_graph_;
bool is_dataset_graph_;
bool partial_memory_;
std::mutex access_lock_;
uint32_t cur_root_graph_id_ = UINT32_MAX;
uint32_t prev_root_graph_id_ = UINT32_MAX;
// flag to keep track of the very first suspension of debugger
bool initial_suspend_;
bool enable_heartbeat_;


+ 12
- 7
mindspore/ccsrc/debug/debugger/debugger_utils.cc View File

@@ -52,7 +52,8 @@ std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &
return real_outputs;
}

void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) {
void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_,
uint32_t root_graph_id) {
// get inputs
auto kernel_inputs = launch_info_->inputs_;
auto input_size = AnfAlgo::GetInputTensorNum(cnode);
@@ -70,7 +71,8 @@ void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uin
auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
string input_tensor_name = input_kernel_name + ':' + "0";
ShapeVector int_shapes = trans::GetRuntimePaddingShape(input_kernel, PARAMETER_OUTPUT_INDEX);
auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order_, format, int_shapes, type, 0, true);
auto ret =
gpu_addr->LoadMemToHost(input_tensor_name, exec_order_, format, int_shapes, type, 0, true, root_graph_id);
if (!ret) {
MS_LOG(ERROR) << "LoadMemToHost:"
<< ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!";
@@ -79,7 +81,8 @@ void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uin
}
}

void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) {
void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_,
uint32_t root_graph_id) {
// get outputs
auto kernel_outputs = launch_info_->outputs_;
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
@@ -99,7 +102,7 @@ void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, ui
auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
string tensor_name = kernel_name + ':' + std::to_string(j);
ShapeVector int_shapes = trans::GetRuntimePaddingShape(cnode, j);
auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order_, format, int_shapes, type, j, false);
auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order_, format, int_shapes, type, j, false, root_graph_id);
if (!ret) {
MS_LOG(ERROR) << "LoadMemToHost:"
<< ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
@@ -136,15 +139,17 @@ void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_
}
auto &dump_json_parser = DumpJsonParser::GetInstance();
bool dump_enabled = debugger->DumpDataEnabledIteration();
auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
MS_EXCEPTION_IF_NULL(kernel_graph);
auto root_graph_id = kernel_graph->root_graph_id();
if (debugger->debugger_enabled() || dump_json_parser.InputNeedDump()) {
LoadInputs(cnode, launch_info_, exec_order_);
LoadInputs(cnode, launch_info_, exec_order_, root_graph_id);
}
if (debugger->debugger_enabled() || dump_json_parser.OutputNeedDump()) {
LoadOutputs(cnode, launch_info_, exec_order_);
LoadOutputs(cnode, launch_info_, exec_order_, root_graph_id);
}
// Dump kernel
if (dump_enabled) {
auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
MS_EXCEPTION_IF_NULL(kernel_graph);
auto graph_id = kernel_graph->graph_id();
debugger->DumpSingleNode(cnode, graph_id);


+ 4
- 2
mindspore/ccsrc/debug/debugger/debugger_utils.h View File

@@ -26,9 +26,11 @@ namespace mindspore {

std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &output_size);

void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_);
void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_,
uint32_t root_graph_id);

void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_);
void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_,
uint32_t root_graph_id);

bool CheckReadData(const CNodePtr &cnode);



+ 3
- 2
mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc View File

@@ -589,8 +589,8 @@ bool AscendDeviceAddress::DumpMemToFile(const std::string &filepath, const std::

#ifdef ENABLE_DEBUGGER
bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &,
const ShapeVector &host_shape, TypeId host_type, size_t slot,
bool keep_prev) const {
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev,
uint32_t root_graph_id) const {
bool ret = false;
auto debugger = Debugger::GetInstance();
MS_EXCEPTION_IF_NULL(debugger);
@@ -619,6 +619,7 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec
tensor_data->SetByteSize(LongToSize(out_tensor->data().nbytes()));
tensor_data->SetType((unsigned int)host_type);
tensor_data->SetShape(out_tensor->shape());
tensor_data->SetRootGraphId(root_graph_id);
ret = debugger->LoadNewTensor(tensor_data, keep_prev);
return ret;
}


+ 2
- 1
mindspore/ccsrc/runtime/device/ascend/ascend_device_address.h View File

@@ -62,7 +62,8 @@ class AscendDeviceAddress : public DeviceAddress {
#endif
#ifdef ENABLE_DEBUGGER
bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const override;
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev,
uint32_t root_graph_id = 0) const override;
#endif

private:


+ 2
- 1
mindspore/ccsrc/runtime/device/device_address.h View File

@@ -118,7 +118,8 @@ class DeviceAddress : public mindspore::DeviceSync {
}
#ifdef ENABLE_DEBUGGER
virtual bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const {
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev,
uint32_t root_graph_id = 0) const {
return true;
}
#endif


+ 3
- 2
mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc View File

@@ -141,8 +141,8 @@ GPUDeviceAddress::~GPUDeviceAddress() { ClearDeviceMemory(); }

#ifdef ENABLE_DEBUGGER
bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
const ShapeVector &host_shape, TypeId host_type, size_t slot,
bool keep_prev) const {
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev,
uint32_t root_graph_id) const {
bool ret = false;
if (size_ == 0) {
return true;
@@ -171,6 +171,7 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi
tensor_data->SetByteSize(out_tensor->data().nbytes());
tensor_data->SetType((unsigned int)host_type);
tensor_data->SetShape(out_tensor->shape());
tensor_data->SetRootGraphId(root_graph_id);
ret = Debugger::GetInstance()->LoadNewTensor(tensor_data, keep_prev);
MS_LOG(INFO) << "E2E tensor name is " << tensor_name;
return ret;


+ 2
- 1
mindspore/ccsrc/runtime/device/gpu/gpu_device_address.h View File

@@ -54,7 +54,8 @@ class GPUDeviceAddress : public DeviceAddress {
#ifdef ENABLE_DEBUGGER
bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const override;
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev,
uint32_t root_graph_id = 0) const override;
#endif
private:


+ 1
- 1
mindspore/ccsrc/runtime/framework/actor/debug_actor.cc View File

@@ -114,10 +114,10 @@ void DebugActor::DebugOnStepEnd(OpContext<DeviceTensor> *const op_context, const
#ifdef ENABLE_DEBUGGER
auto debugger = Debugger::GetInstance();
if (debugger != nullptr) {
debugger->Debugger::UpdateStepNumGPU();
// Reset exec_order for the next step
exec_order_ = 0;
debugger->Debugger::PostExecuteGraphDebugger();
debugger->Debugger::UpdateStepNumGPU();
}
#else
#ifndef ENABLE_SECURITY


+ 1
- 0
mindspore/ccsrc/runtime/framework/graph_compiler.cc View File

@@ -324,6 +324,7 @@ GraphId GraphCompiler::CompileGraph(const AnfNodePtrList &nodes, const AnfNodePt
auto backend_node = graph->output();
MS_EXCEPTION_IF_NULL(backend_node);
graph->CacheGraphOutputToFrontNodeWithIndex({backend_node}, outputs);
graph->set_root_graph_id(graph_id);

return graph_id;
}


Loading…
Cancel
Save