From 7687a55c0a6e25404bfbd820311ba7715b77d9c3 Mon Sep 17 00:00:00 2001 From: Harshvardhan Gupta Date: Mon, 30 Nov 2020 10:21:03 -0500 Subject: [PATCH] return error code if prev tensor not found and add validation checks --- mindspore/ccsrc/debug/debug_services.cc | 4 +- mindspore/ccsrc/debug/debug_services.h | 2 + .../ccsrc/debug/debugger/tensor_summary.cc | 47 ++++++++++++------- 3 files changed, 35 insertions(+), 18 deletions(-) diff --git a/mindspore/ccsrc/debug/debug_services.cc b/mindspore/ccsrc/debug/debug_services.cc index 0fb6f95daa..22630d3f79 100644 --- a/mindspore/ccsrc/debug/debug_services.cc +++ b/mindspore/ccsrc/debug/debug_services.cc @@ -157,7 +157,7 @@ void DebugServices::CheckWatchpoints(std::vector *name, std::vector } default: MS_LOG(INFO) << "Unsupported tensor type"; - break; + continue; } base_summary_ptr->SummarizeTensor(watchpoints_to_check); } @@ -168,7 +168,7 @@ void DebugServices::CheckWatchpoints(std::vector *name, std::vector std::vector parameter_list = {}; if (wp.condition.type == IS_OVERFLOW) { is_hit = (std::find(op_overflows.begin(), op_overflows.end(), tensor_name_no_slot) != op_overflows.end()); - } else { + } else if (base_summary_ptr != nullptr) { auto item = base_summary_ptr->IsWatchpointHit(wp); is_hit = std::get<0>(item); error_code = std::get<1>(item); diff --git a/mindspore/ccsrc/debug/debug_services.h b/mindspore/ccsrc/debug/debug_services.h index 14e1612dcf..fd34390811 100644 --- a/mindspore/ccsrc/debug/debug_services.h +++ b/mindspore/ccsrc/debug/debug_services.h @@ -80,6 +80,8 @@ class DebugServices { if (std::isnan(actualValue)) return; actual_value = actualValue; + // if cannot extract inequality type from watchpoint + // try extract from parameter name if (inequality_type.empty()) { auto pos = name.find_last_of('_'); if (pos != std::string::npos) { diff --git a/mindspore/ccsrc/debug/debugger/tensor_summary.cc b/mindspore/ccsrc/debug/debugger/tensor_summary.cc index 4cdeb1564a..86cf9e6241 100644 --- a/mindspore/ccsrc/debug/debugger/tensor_summary.cc +++ b/mindspore/ccsrc/debug/debugger/tensor_summary.cc @@ -135,8 +135,9 @@ std::tuple> TensorSummary: bool hit = false; std::bitset<32> error_code; CONDITION_TYPE type = wp.condition.type; - + // bit 0 denotes presence of nan error_code.set(0, nan_count > 0); + // bit 1 denotes presence of inf error_code.set(1, inf_count > 0); if (type == CONDITION_TYPE::HAS_NAN) { @@ -150,20 +151,28 @@ std::tuple> TensorSummary: hit = (nan_count + inf_count) > 0; } else if (type == CONDITION_TYPE::NOT_CHANGED && prev_tensor_ptr && error_code.none()) { hit = all_close[wp.id]->IsAllClose(); + } else if ((type == CONDITION_TYPE::NOT_CHANGED || type == CONDITION_TYPE::CHANGE_TOO_LARGE || + type == CONDITION_TYPE::CHANGE_TOO_SMALL) && + !prev_tensor_ptr) { + // bit 2 denotes absence of previous tensor + error_code.set(2, true); } - for (auto ¶meter : parameter_list) { - if (parameter.disabled || error_code.any()) { - continue; - } - std::string inequality_type; - if (wp.is_gt_wp()) { - inequality_type = "gt"; - } else if (wp.is_lt_wp()) { - inequality_type = "lt"; + if (error_code.none()) { + for (auto ¶meter : parameter_list) { + if (parameter.disabled || error_code.any()) { + continue; + } + // extract inequality type from watchpoint for backward compatibility + std::string inequality_type; + if (wp.is_gt_wp()) { + inequality_type = "gt"; + } else if (wp.is_lt_wp()) { + inequality_type = "lt"; + } + parameter.Evaluate(StatLookup(parameter.name, wp), inequality_type); + hit |= parameter.hit; } - parameter.Evaluate(StatLookup(parameter.name, wp), inequality_type); - hit |= parameter.hit; } return std::make_tuple(hit, static_cast(error_code.to_ulong()), parameter_list); } @@ -188,11 +197,17 @@ double_t TensorSummary::StatLookup(const std::string ¶meter_name, const D } else if (param_type == "sd") { return current_mean_variance.GetStandardDeviation(); } else if (param_type == "abs_mean") { - return means["abs_current_mean"]->GetMean(); - } else if (param_type == "abs_mean_update_ratio") { - return means["curr_prev_diff_mean"]->GetMean() / (means["abs_prev_mean"]->GetMean() + epsilon); + if (means.find("abs_current_mean") != means.end()) { + return means["abs_current_mean"]->GetMean(); + } + } else if (param_type == "abs_mean_update_ratio" && prev_tensor_ptr) { + if (means.find("curr_prev_diff_mean") != means.end() && means.find("abs_prev_mean") != means.end()) { + return means["curr_prev_diff_mean"]->GetMean() / (means["abs_prev_mean"]->GetMean() + epsilon); + } } else if (param_type == "range_percentage") { - return range_counts[wp.id]->GetPercentInRange(); + if (range_counts.find(wp.id) != range_counts.end()) { + return range_counts[wp.id]->GetPercentInRange(); + } } else if (param_type == "zero_percentage") { return GetZeroValPercent(); }