From b8fc889368564e40cb8f2913d69955c7bf4fb6e2 Mon Sep 17 00:00:00 2001
From: Adel Shafiei <adel.shafiei@huawei.com>
Date: Fri, 23 Oct 2020 21:24:36 -0400
Subject: [PATCH] fixed the problem with missing inputs dumps when using GPU

---
 mindspore/ccsrc/debug/data_dump/e2e_dump_util.cc    | 10 +++++-----
 mindspore/ccsrc/debug/debugger/debugger.cc          |  2 +-
 .../ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc  | 13 +++++++++----
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/mindspore/ccsrc/debug/data_dump/e2e_dump_util.cc b/mindspore/ccsrc/debug/data_dump/e2e_dump_util.cc
index b86d8642e4..fc3682b670 100644
--- a/mindspore/ccsrc/debug/data_dump/e2e_dump_util.cc
+++ b/mindspore/ccsrc/debug/data_dump/e2e_dump_util.cc
@@ -121,8 +121,8 @@ void E2eDumpUtil::DumpOutput(const session::KernelGraph *graph, const std::strin
       auto type = AnfAlgo::GetOutputInferDataType(node, j);
       std::string file_path = dump_path + '/' + kernel_name + '_' + "output_" + std::to_string(j);
       if (IsDeviceTargetGPU()) {
-        DumpGPUMemToFile(file_path, node->fullname_with_scope(), NOT_NULL(addr), trans_flag, int_shapes, type, j,
-                         debugger);
+        DumpGPUMemToFile(file_path, node->fullname_with_scope() + "_output", NOT_NULL(addr), trans_flag, int_shapes,
+                         type, j, debugger);
       } else {
         DumpMemToFile(file_path, NOT_NULL(addr), trans_flag, int_shapes, type);
       }
@@ -160,8 +160,8 @@ void E2eDumpUtil::DumpInput(const session::KernelGraph *graph, const std::string
       auto type = AnfAlgo::GetOutputInferDataType(input, index);
       std::string file_path = dump_path + '/' + kernel_name + '_' + "input_" + std::to_string(j);
       if (IsDeviceTargetGPU()) {
-        DumpGPUMemToFile(file_path, node->fullname_with_scope(), NOT_NULL(addr), trans_flag, int_shapes, type, j,
-                         debugger);
+        DumpGPUMemToFile(file_path, node->fullname_with_scope() + "_input", NOT_NULL(addr), trans_flag, int_shapes,
+                         type, j, debugger);
       } else {
         DumpMemToFile(file_path, NOT_NULL(addr), trans_flag, int_shapes, type);
       }
@@ -249,7 +249,7 @@ void E2eDumpUtil::DumpSingleAnfnode(const AnfNodePtr &anf_node, const size_t out
 
   std::string file_path = dump_path + '/' + dump_name + '_' + "output_0";
   if (IsDeviceTargetGPU()) {
-    DumpGPUMemToFile(file_path, node_name, NOT_NULL(addr), trans_flag, int_shapes, type, 0, debugger);
+    DumpGPUMemToFile(file_path, node_name + "_output", NOT_NULL(addr), trans_flag, int_shapes, type, 0, debugger);
   } else {
     DumpMemToFile(file_path, NOT_NULL(addr), trans_flag, int_shapes, type);
   }
diff --git a/mindspore/ccsrc/debug/debugger/debugger.cc b/mindspore/ccsrc/debug/debugger/debugger.cc
index f867b11ccb..b2425ca1dd 100644
--- a/mindspore/ccsrc/debug/debugger/debugger.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger.cc
@@ -890,7 +890,7 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output
   MS_EXCEPTION_IF_NULL(addr);
   auto type = AnfAlgo::GetOutputInferDataType(anf_node, output_index);
   auto format = kOpFormat_DEFAULT;
-  string tensor_name = node_name + ':' + "0";
+  string tensor_name = node_name + "_output:" + "0";
   ShapeVector int_shapes;
   auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);
   (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
index 784d7babd0..c854362a35 100644
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
@@ -123,18 +123,23 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
     // get inputs
     auto input_size = AnfAlgo::GetInputTensorNum(kernel);
     for (size_t j = 0; j < input_size; ++j) {
-      auto input_kernel = kernel->input(j + 1);
+      auto kernel_with_index = AnfAlgo::GetPrevNodeOutput(kernel, j);
+      auto input_kernel = kernel_with_index.first;
+      auto index = kernel_with_index.second;
+
       std::string input_kernel_name = input_kernel->fullname_with_scope();
       auto addr = kernel_inputs[j];
       auto type = AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX);
       auto format = kOpFormat_DEFAULT;
       auto gpu_addr = std::make_unique<GPUDeviceAddress>(addr->addr, addr->size, format, type);
-      string input_tensor_name = input_kernel_name + ':' + "0";
+      string input_tensor_name = input_kernel_name + "_output:" + std::to_string(index);
       ShapeVector int_shapes;
       auto shape = AnfAlgo::GetOutputDeviceShape(input_kernel, PARAMETER_OUTPUT_INDEX);
       (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
                            [](size_t inner_item) { return SizeToInt(inner_item); });
-      auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, true);
+      auto cur_node_name = kernel->fullname_with_scope();
+      auto ret = gpu_addr->LoadMemToHost(cur_node_name + "_input:" + std::to_string(j), exec_order, format, int_shapes,
+                                         type, 0, true);
       if (!ret) {
         MS_LOG(ERROR) << "LoadMemToHost:"
                       << ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!";
@@ -156,7 +161,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
       auto type = AnfAlgo::GetOutputInferDataType(kernel, j);
       auto format = kOpFormat_DEFAULT;
       auto gpu_addr = std::make_unique<GPUDeviceAddress>(addr->addr, addr->size, format, type);
-      string tensor_name = kernel_name + ':' + std::to_string(j);
+      string tensor_name = kernel_name + "_output:" + "0";
       ShapeVector int_shapes;
       auto shape = AnfAlgo::GetOutputDeviceShape(kernel, j);
       (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),