From 6bb2182134cd5ea4aaf128fb1318b0f1d6acd88b Mon Sep 17 00:00:00 2001
From: lichen_101010
Date: Mon, 22 Jun 2020 16:52:42 -0400
Subject: [PATCH] Add partial memory reuse support to debugger
move pre-execution of debugger from rungraph to build/compile graph
support partial mem reuse for a scope of nodes
set default mem reuse to be true for debugger
remove some redundant lines
remove redundant code and fix a bug for supporting partial no mem reuse a scope of nodes
resolve CI errors
Solve CI errors
solve cpplint errors
solve CI build error
manually fix the CI compile UT error
Optimize code for mem reuse support
Debug optimization of debugger memory reuse
debug code for debugger memory reuse part2
address clang-format errors
Switch memory reuse on and off based on environment variable
Fix typo
Fix typo
Load watchpoint value only
fix bugs
Addressed comments from lupengcheng
fix typo
Fix typo
fix CI errors
refactor some code
fix typo
addressed comments from canadian teamates
remove locking from TensorLoader
fix CI errors
add lock to tensor_loader
fix rebase-to-master conflict
fix rebase conflicts
fix rebase conflicts part 2
fix rebase conflicts part 3
---
.../mem_reuse/mem_reuse_allocator.cc | 14 +++++-
.../ccsrc/backend/session/ascend_session.cc | 19 +++++---
mindspore/ccsrc/debug/debug_services.cc | 42 +++++++++++++-----
mindspore/ccsrc/debug/debug_services.h | 36 +++++++++-------
mindspore/ccsrc/debug/debugger/debugger.cc | 43 +++++++++++++++----
mindspore/ccsrc/debug/debugger/debugger.h | 3 ++
mindspore/ccsrc/debug/tensor_data.h | 20 ++-------
mindspore/ccsrc/debug/tensor_load.h | 9 ++--
.../device/ascend/ascend_device_address.cc | 23 ++++------
.../device/ascend/ascend_kernel_runtime.cc | 10 +++++
10 files changed, 142 insertions(+), 77 deletions(-)
diff --git a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.cc
index 787d334a1a..d1a50a0dfe 100644
--- a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.cc
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.cc
@@ -13,13 +13,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
#include "backend/optimizer/mem_reuse/mem_reuse_allocator.h"
#include "backend/optimizer/mem_reuse/mem_reuse.h"
#include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
#ifdef ENABLE_D
#include "runtime/device/ascend/ascend_stream_assign.h"
#endif
+#ifdef ENABLE_DEBUGGER
+#include "debug/debugger/debugger.h"
+#include "debug/debug_services.h"
+#endif
namespace mindspore {
namespace memreuse {
@@ -75,6 +78,15 @@ bool BestFitMemReuse::IsUsable(const KernelDefPtr &kernel_curr, const MembufPtr
MS_EXCEPTION_IF_NULL(mem_buf);
auto kernel_prev = mem_buf->used_kernel_;
MS_EXCEPTION_IF_NULL(kernel_prev);
+#ifdef ENABLE_DEBUGGER
+ auto debugger_ = mindspore::Debugger::GetInstance();
+ DebugServices *debug_services = debugger_->debug_services();
+ auto watchpoint_table = debug_services->GetWatchpointTable();
+ std::string current_kernel_name = kernel_curr->scope_full_name();
+ if (debug_services->IsWatchPoint(current_kernel_name, watchpoint_table)) {
+ return false;
+ }
+#endif
auto curr_stream_id = kernel_curr->stream_id();
auto prev_stream_id = kernel_prev->stream_id();
if (curr_stream_id == prev_stream_id) {
diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc
index 9995518c00..3987b9f183 100644
--- a/mindspore/ccsrc/backend/session/ascend_session.cc
+++ b/mindspore/ccsrc/backend/session/ascend_session.cc
@@ -331,6 +331,11 @@ GraphId AscendSession::CompileGraph(NotNull func_graph) {
device::KernelAdjust::GetInstance().Profiling(NOT_NULL(root_graph.get()));
// build kernel
BuildKernel(root_graph);
+#ifdef ENABLE_DEBUGGER
+ if (debugger_) {
+ debugger_->PreExecute(root_graph);
+ }
+#endif
// alloc mem
MemoryAlloc(root_graph.get());
// task generate
@@ -407,6 +412,11 @@ void AscendSession::BuildGraph(GraphId graph_id) {
BuildKernel(graph);
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
+#ifdef ENABLE_DEBUGGER
+ if (debugger_) {
+ debugger_->PreExecute(graph);
+ }
+#endif
if (ms_context->precompile_only()) {
MS_LOG(INFO) << "Precompile only, stop in build kernel step";
} else {
@@ -475,12 +485,6 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vectorPreExecute(kernel_graph);
- }
-#endif
{
py::gil_scoped_release release;
// run task on device
@@ -791,7 +795,8 @@ void AscendSession::LoadTensor(const std::shared_ptr &kernel_graph)
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
DebugServices *debug_services = debugger_->debug_services();
- TensorLoader *tensor_loader = debug_services->get_tensor_loader();
+ TensorLoader *tensor_loader = debug_services->tensor_loader();
+ // TensorData will be freed up here
tensor_loader->EmptyTensor();
uint32_t iter_num = tensor_loader->GetIterNum();
tensor_loader->set_iter_num(++iter_num);
diff --git a/mindspore/ccsrc/debug/debug_services.cc b/mindspore/ccsrc/debug/debug_services.cc
index cb883eef51..cc6c5c53ad 100644
--- a/mindspore/ccsrc/debug/debug_services.cc
+++ b/mindspore/ccsrc/debug/debug_services.cc
@@ -37,8 +37,8 @@ DebugServices &DebugServices::operator=(const DebugServices &other) {
DebugServices::~DebugServices() { delete tensor_loader_; }
-void DebugServices::add_watchpoint(unsigned int id, unsigned int watch_condition,
- const std::vector> &check_node_list) {
+void DebugServices::AddWatchpoint(unsigned int id, unsigned int watch_condition,
+ const std::vector> &check_node_list) {
std::lock_guard lg(lock_);
watchpoint_t watchpoint_item;
@@ -57,14 +57,14 @@ void DebugServices::add_watchpoint(unsigned int id, unsigned int watch_condition
watchpoint_table[id] = watchpoint_item;
}
-void DebugServices::remove_watchpoint(unsigned int id) {
+void DebugServices::RemoveWatchpoint(unsigned int id) {
std::lock_guard lg(lock_);
watchpoint_table.erase(id);
}
-void DebugServices::check_watchpoints(std::vector *name, std::vector *slot,
- std::vector *data_ptr, std::vector *data_size,
- std::vector *condition, std::vector *wacthpoint_id) {
+void DebugServices::CheckWatchpoints(std::vector *name, std::vector *slot,
+ std::vector *data_ptr, std::vector *data_size,
+ std::vector *condition, std::vector *wacthpoint_id) {
std::lock_guard lg(lock_);
std::vector> tensor_list = tensor_loader_->GetTensor();
@@ -171,9 +171,9 @@ void DebugServices::check_watchpoints(std::vector *name, std::vecto
}
}
-void DebugServices::read_nodes_tensors(std::vector name, std::vector *ret_name,
- std::vector *data_ptr, std::vector *data_size,
- std::vector *dtype, std::vector> *shape) {
+void DebugServices::ReadNodesTensors(std::vector name, std::vector *ret_name,
+ std::vector *data_ptr, std::vector *data_size,
+ std::vector *dtype, std::vector> *shape) {
std::vector>> result_list;
tensor_loader_->SearchTensors(name, &result_list);
@@ -189,6 +189,28 @@ void DebugServices::read_nodes_tensors(std::vector name, std::vecto
}
}
-TensorLoader *DebugServices::get_tensor_loader() const { return tensor_loader_; }
+bool DebugServices::IsWatchPoint(std::string kernel_name,
+ std::unordered_map watchpoint_table) {
+ bool ret = false;
+ for (auto w_table_item : watchpoint_table) {
+ auto check_node_list = std::get<1>(w_table_item).check_node_list;
+ for (auto check_node : check_node_list) {
+ std::string w_name = std::get<0>(check_node);
+ bool w_type = std::get<1>(check_node);
+ if ((w_type == true &&
+ ((kernel_name.find(w_name) != string::npos && kernel_name.rfind(w_name, 0) == 0) || w_name == "*")) ||
+ (w_type == false && kernel_name == w_name)) {
+ ret = true;
+ return ret;
+ }
+ }
+ }
+ return ret;
+}
+
+TensorLoader *DebugServices::tensor_loader() const { return tensor_loader_; }
+std::unordered_map DebugServices::GetWatchpointTable() {
+ return watchpoint_table;
+}
} // namespace mindspore
diff --git a/mindspore/ccsrc/debug/debug_services.h b/mindspore/ccsrc/debug/debug_services.h
index b2fd41cd68..41400af1d5 100644
--- a/mindspore/ccsrc/debug/debug_services.h
+++ b/mindspore/ccsrc/debug/debug_services.h
@@ -37,22 +37,6 @@ class DebugServices {
~DebugServices();
- void add_watchpoint(unsigned int id, unsigned int watch_condition,
- const std::vector> &check_node_list);
-
- void remove_watchpoint(unsigned int id);
-
- void check_watchpoints(std::vector *name, std::vector *slot, std::vector *data_ptr,
- std::vector *data_size, std::vector *condition,
- std::vector *wacthpoint_id);
-
- void read_nodes_tensors(std::vector name, std::vector *ret_name,
- std::vector *data_ptr, std::vector *data_size,
- std::vector *dtype, std::vector> *shape);
-
- TensorLoader *get_tensor_loader() const;
-
- private:
typedef struct condition_no_param {
bool enabled = false;
} condition_no_param_t;
@@ -84,6 +68,26 @@ class DebugServices {
std::vector> check_node_list;
} watchpoint_t;
+ void AddWatchpoint(unsigned int id, unsigned int watch_condition,
+ const std::vector> &check_node_list);
+
+ void RemoveWatchpoint(unsigned int id);
+
+ void CheckWatchpoints(std::vector *name, std::vector *slot, std::vector *data_ptr,
+ std::vector *data_size, std::vector *condition,
+ std::vector *wacthpoint_id);
+
+ void ReadNodesTensors(std::vector name, std::vector *ret_name,
+ std::vector *data_ptr, std::vector *data_size,
+ std::vector *dtype, std::vector> *shape);
+
+ bool IsWatchPoint(std::string kernel_name, std::unordered_map watchpoint_table);
+
+ TensorLoader *tensor_loader() const;
+
+ std::unordered_map GetWatchpointTable();
+
+ private:
std::mutex lock_;
std::unordered_map watchpoint_table;
diff --git a/mindspore/ccsrc/debug/debugger/debugger.cc b/mindspore/ccsrc/debug/debugger/debugger.cc
index 369f33d79c..dd89e17e2d 100644
--- a/mindspore/ccsrc/debug/debugger/debugger.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger.cc
@@ -43,7 +43,8 @@ Debugger::Debugger()
device_id_(0),
num_step_(0),
debugger_enabled_(false),
- is_dataset_graph_(false) {}
+ is_dataset_graph_(false),
+ partial_memory_(false) {}
void Debugger::Init(const uint32_t device_id) {
// access lock for public method
@@ -57,6 +58,7 @@ void Debugger::EnableDebugger() {
// reset some of the class members
num_step_ = 0;
debugger_enabled_ = false;
+ partial_memory_ = false;
grpc_client_ = nullptr;
debug_services_ = nullptr;
@@ -72,7 +74,8 @@ void Debugger::EnableDebugger() {
MS_LOG(WARNING) << "Not enabling debugger. Set environment variable ENABLE_MS_DEBUGGER=1 to enable debugger.";
return;
}
- // configure host
+
+ // configure grpc host
const char *env_host_str = std::getenv("MS_DEBUGGER_HOST");
std::string host;
if (env_host_str != nullptr) {
@@ -82,7 +85,7 @@ void Debugger::EnableDebugger() {
MS_LOG(WARNING) << "Environment variable MS_DEBUGGER_HOST doesn't exist. Using default debugger host: localhost";
host = "localhost";
}
- // configure port
+ // configure grpc port
const char *env_port_str = std::getenv("MS_DEBUGGER_PORT");
std::string port;
if (env_port_str != nullptr) {
@@ -93,6 +96,27 @@ void Debugger::EnableDebugger() {
port = "50051";
}
+ // configure partial memory reuse
+ const char *env_partial_mem_str = std::getenv("MS_DEBUGGER_PARTIAL_MEM");
+ if (env_partial_mem_str != nullptr) {
+ MS_LOG(INFO) << "Getenv MS_DEBUGGER_PARTIAL_MEM: " << env_partial_mem_str;
+ if (std::strcmp(env_partial_mem_str, "1") == 0) {
+ partial_memory_ = true;
+ }
+ }
+ // switch memory reuse on or off
+ auto context_ptr = MsContext::GetInstance();
+ MS_EXCEPTION_IF_NULL(context_ptr);
+ context_ptr->set_enable_mem_reuse(partial_memory_);
+ // print some message about memory reuse to user
+ if (partial_memory_) {
+ MS_LOG(WARNING) << "Partial Memory Reuse is enabled. Note: 1. Please only set watchpoints before running the first "
+ "step. 2. Tensor values are only available for nodes that are watched by any watchpoint.";
+ } else {
+ MS_LOG(WARNING) << "Memory Reuse is disabled. Set environment variable MS_DEBUGGER_PARTIAL_MEM=1 to reduce memory "
+ "usage for large models.";
+ }
+
// initialize grpc client
grpc_client_ = std::make_unique(host, port);
debug_services_ = std::make_unique();
@@ -106,6 +130,7 @@ void Debugger::Reset() {
num_step_ = 0;
debugger_enabled_ = false;
is_dataset_graph_ = false;
+ partial_memory_ = false;
graph_ptr_ = nullptr;
grpc_client_ = nullptr;
debug_services_ = nullptr;
@@ -317,11 +342,10 @@ void Debugger::SetWatchpoint(const ProtoVector &nodes, const WatchCon
[](WatchNode node) -> std::tuple {
return make_tuple(node.node_name(), node.node_type() == "scope");
});
-
- debug_services_->add_watchpoint(id, condition.condition(), check_node_list);
+ debug_services_->AddWatchpoint(id, condition.condition(), check_node_list);
}
-void Debugger::RemoveWatchpoint(const int32_t id) { debug_services_->remove_watchpoint(id); }
+void Debugger::RemoveWatchpoint(const int32_t id) { debug_services_->RemoveWatchpoint(id); }
std::list Debugger::LoadTensors(const ProtoVector &tensors) const {
std::vector name;
@@ -335,7 +359,7 @@ std::list Debugger::LoadTensors(const ProtoVector &ten
// ret_name will contain tensor names that are found in TensorLoader
// items in ret_name will be in the same order with tensors if found
- debug_services_->read_nodes_tensors(name, &ret_name, &data_ptr, &data_size, &dtype, &shape);
+ debug_services_->ReadNodesTensors(name, &ret_name, &data_ptr, &data_size, &dtype, &shape);
std::list tensor_list;
unsigned int result_index = 0;
@@ -384,8 +408,7 @@ std::list Debugger::CheckWatchpoints() const {
std::vector condition;
std::vector watchpoint_id;
- debug_services_->check_watchpoints(&name, &slot, &data_ptr, &data_size, &condition, &watchpoint_id);
-
+ debug_services_->CheckWatchpoints(&name, &slot, &data_ptr, &data_size, &condition, &watchpoint_id);
std::list hits;
for (unsigned int i = 0; i < name.size(); i++) {
WatchpointHit hit;
@@ -494,4 +517,6 @@ std::string GetTensorFullName(const TensorProto &tensor) {
return node_name + ":" + tensor.slot() + (tensor.iter() == "" ? "" : ":" + tensor.iter());
}
+bool Debugger::partial_memory() { return partial_memory_; }
+
} // namespace mindspore
diff --git a/mindspore/ccsrc/debug/debugger/debugger.h b/mindspore/ccsrc/debug/debugger/debugger.h
index da1f325291..5a3965d7cc 100644
--- a/mindspore/ccsrc/debug/debugger/debugger.h
+++ b/mindspore/ccsrc/debug/debugger/debugger.h
@@ -76,6 +76,8 @@ class Debugger : public std::enable_shared_from_this {
bool debugger_enabled() const;
+ bool partial_memory();
+
private:
// private constructor for singleton
Debugger();
@@ -129,6 +131,7 @@ class Debugger : public std::enable_shared_from_this {
int32_t num_step_;
bool debugger_enabled_;
bool is_dataset_graph_;
+ bool partial_memory_;
std::mutex access_lock_;
// singleton
diff --git a/mindspore/ccsrc/debug/tensor_data.h b/mindspore/ccsrc/debug/tensor_data.h
index 9704d69089..00af203208 100644
--- a/mindspore/ccsrc/debug/tensor_data.h
+++ b/mindspore/ccsrc/debug/tensor_data.h
@@ -51,25 +51,13 @@ class TensorData {
int GetExecutionOrder() { return this->execution_order; }
- int SetExecutionOrder(int execution_order) {
- this->execution_order = execution_order;
- return true;
- }
+ void SetExecutionOrder(int execution_order) { this->execution_order = execution_order; }
- int SetName(const std::string &name) {
- this->name = name;
- return true;
- }
+ void SetName(const std::string &name) { this->name = name; }
- bool SetTensor(mindspore::tensor::TensorPtr out_tensor) {
- this->tensor_ptr = out_tensor;
- return true;
- }
+ void SetTensor(mindspore::tensor::TensorPtr out_tensor) { this->tensor_ptr = out_tensor; }
- bool SetSlot(size_t slot) {
- this->slot = slot;
- return true;
- }
+ void SetSlot(size_t slot) { this->slot = slot; }
};
} // namespace mindspore
#endif // MINDSPORE_CCSRC_DEBUG_TENSOR_DATA_H_
diff --git a/mindspore/ccsrc/debug/tensor_load.h b/mindspore/ccsrc/debug/tensor_load.h
index e3ae5c94eb..ae0e89aae2 100644
--- a/mindspore/ccsrc/debug/tensor_load.h
+++ b/mindspore/ccsrc/debug/tensor_load.h
@@ -19,6 +19,7 @@
#include
#include
#include