From 809c060ae19a3584fef09d647875917c0073f6d4 Mon Sep 17 00:00:00 2001 From: Adel Shafiei Date: Fri, 11 Dec 2020 13:07:44 -0500 Subject: [PATCH] fixed recheck issue for operator overflow --- mindspore/ccsrc/debug/debugger/debugger.cc | 10 +++++++++- mindspore/ccsrc/debug/debugger/debugger.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/mindspore/ccsrc/debug/debugger/debugger.cc b/mindspore/ccsrc/debug/debugger/debugger.cc index 6bc56a31bb..9c2b99b008 100644 --- a/mindspore/ccsrc/debug/debugger/debugger.cc +++ b/mindspore/ccsrc/debug/debugger/debugger.cc @@ -1045,7 +1045,15 @@ std::vector Debugger::CheckOpOverflow() { } } - return op_names; + auto iter_op_names = overflow_ops_.find(num_step_); + if (iter_op_names == overflow_ops_.end()) { + overflow_ops_.insert(std::pair>(num_step_, op_names)); + + return op_names; + } + iter_op_names->second.insert(std::end(iter_op_names->second), std::begin(op_names), std::end(op_names)); + + return iter_op_names->second; } void Debugger::SetTrainingDone(bool training_done) { training_done_ = training_done; } diff --git a/mindspore/ccsrc/debug/debugger/debugger.h b/mindspore/ccsrc/debug/debugger/debugger.h index 3c6e48c541..19feb29f18 100644 --- a/mindspore/ccsrc/debug/debugger/debugger.h +++ b/mindspore/ccsrc/debug/debugger/debugger.h @@ -233,6 +233,7 @@ class Debugger : public std::enable_shared_from_this { bool partial_memory_; std::mutex access_lock_; std::map, std::string> stream_task_to_opname_; + std::map> overflow_ops_; double last_overflow_bin_; std::map overflow_bin_path_; // flag to keep track of the very first suspension of debugger