support watchpoints on weights and bias, add support advanced watchpoints

5 years ago · 727d424553
--- a/mindspore/ccsrc/debug/debug_services.cc
+++ b/mindspore/ccsrc/debug/debug_services.cc
@@ -14,6 +14,8 @@
 * limitations under the License.
 */
 #include <algorithm>
 #include <map>
 #include "backend/session/anf_runtime_algorithm.h"
 #include "debug/debug_services.h"
 namespace mindspore {

@@ -39,17 +41,19 @@ DebugServices &DebugServices::operator=(const DebugServices &other) {
 DebugServices::~DebugServices() { delete tensor_loader_; }

 void DebugServices::AddWatchpoint(unsigned int id, unsigned int watch_condition, float parameter,
                                  const std::vector<std::tuple<std::string, bool>> &check_node_list) {
                                  const std::vector<std::tuple<std::string, bool>> &check_node_list,
                                  const std::vector<parameter_t> &parameter_list) {
  std::lock_guard<std::mutex> lg(lock_);

  watchpoint_t watchpoint_item;
  watchpoint_item.id = id;
  watchpoint_item.condition.type = static_cast<CONDITION_TYPE>(watch_condition);
  watchpoint_item.condition.parameter = parameter;
  if (watch_condition > 2)
    // odd indices are greater than conditions and even indicies are less than
  if (watch_condition > 2 && watch_condition < 13)
    // odd indices are greater than conditions and even indices are less than
    watchpoint_item.condition.comparison = (watch_condition & 1) == 0 ? "LT" : "GT";
  watchpoint_item.check_node_list = check_node_list;
  watchpoint_item.parameter_list = parameter_list;
  watchpoint_table[id] = watchpoint_item;
 }

@@ -59,11 +63,22 @@ void DebugServices::RemoveWatchpoint(unsigned int id) {
 }

 template <typename T>
 DebugServices::tensor_stats DebugServices::SummarizeTensor(const T *start, unsigned int n, bool need_min_max,
                                                           bool need_mean_sd) {
 DebugServices::tensor_stats DebugServices::SummarizeTensor(const T *start, const T *start_prev, unsigned int n,
                                                           bool need_min_max, bool need_mean_sd,
                                                           bool need_zero_percentage,
                                                           bool need_tensor_update_ratio_mean, bool need_allclose) {
  tensor_stats stats;
  double zero_count = 0.0;
  double rtol = 1.0e-5;
  double atol = 1.0e-8;
  double update_ratio_sum = 0.0;
  double epsilon = 1.0e-9;
  for (unsigned int i = 0; i < n; ++i) {
    auto val = static_cast<double>(start[i]);
    double val_prev = 0.0;
    if (start_prev) {
      val_prev = static_cast<double>(start_prev[i]);
    }
    stats.has_nan = stats.has_nan || std::isnan(val);
    stats.has_inf = stats.has_inf || std::isinf(val);
    if (stats.has_inf && stats.has_nan) {
@@ -81,15 +96,33 @@ DebugServices::tensor_stats DebugServices::SummarizeTensor(const T *start, unsig
      stats.mean += delta / (i + 1);
      stats.m2 += delta * (val - stats.mean);
    }

    if (need_zero_percentage) {
      if (val == 0) zero_count++;
    }

    if (need_tensor_update_ratio_mean && start_prev) {
      update_ratio_sum += (std::abs(val) / (epsilon + std::abs(val_prev)));
    }

    if (need_allclose && start_prev) {
      stats.allclose &= (std::abs(val - val_prev) <= (atol + rtol * std::abs(val_prev)));
    }
  }
  if (need_tensor_update_ratio_mean && start_prev) {
    stats.tensor_update_ratio_mean = (update_ratio_sum / n);
  }
  stats.zero_percentage = (zero_count / n) * 100;
  stats.n = n;
  return stats;
 }

 void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot,
                                     std::vector<int> *condition, std::vector<unsigned int> *watchpoint_id,
                                     std::vector<std::vector<parameter_t>> *parameters,
                                     const std::vector<std::string> &op_overflows,
                                     const std::vector<std::shared_ptr<TensorData>> &tensor_list) {
                                     const std::vector<std::shared_ptr<TensorData>> &tensor_list,
                                     const bool init_dbg_suspend) {
  std::lock_guard<std::mutex> lg(lock_);
  if (watchpoint_table.empty()) {
    return;
@@ -102,79 +135,145 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
    mindspore::tensor::TensorPtr tensor_ptr = tensor->GetTensor();
    int tensor_dtype = tensor_ptr->data_type_c();
    std::vector<unsigned int> hit_encountered;
    std::vector<std::vector<bool>> hit_parms;
    std::unordered_map<unsigned int, watchpoint_t> watchpoints_to_check_table;
    bool min_max_enabled = false;
    bool mean_sd_enabled = false;
    bool inf_nan_enabled = false;
    bool zero_percentage_enabled = false;
    bool tensor_update_ratio_mean_enabled = false;
    bool allclose_enabled = false;
    for (auto w_table_item : watchpoint_table) {
      auto wp = std::get<1>(w_table_item);
      if (wp.condition.type == INIT && !init_dbg_suspend) continue;
      if (wp.condition.type != IS_OVERFLOW && tensor_dtype == kNumberTypeBool) continue;
      if (wp.IsNodeIncluded(tensor_name_no_slot)) {
        min_max_enabled |= wp.min_max_enabled();
        mean_sd_enabled |= wp.mean_sd_enabled();
        inf_nan_enabled |= wp.inf_nan_enabled();
        zero_percentage_enabled |= wp.zero_percentage_enabled();
        tensor_update_ratio_mean_enabled |= wp.tensor_update_ratio_mean_enabled();
        allclose_enabled |= wp.allclose_enabled();
        watchpoints_to_check_table[w_table_item.second.id] = w_table_item.second;
      }
    }
    tensor_stats stats;
    uint num_elements = tensor_ptr->DataSize();
    if (min_max_enabled || mean_sd_enabled || inf_nan_enabled) {
    if (min_max_enabled || mean_sd_enabled || inf_nan_enabled || zero_percentage_enabled ||
        tensor_update_ratio_mean_enabled || allclose_enabled) {
      bool need_prev = (tensor_update_ratio_mean_enabled || allclose_enabled);
      bool have_prev = tensor_loader_->GetPrevTensor(tensor_name) != NULL;
      switch (tensor_dtype) {
        case kNumberTypeUInt8: {
          auto start_addr = reinterpret_cast<uint8_t *>(tensor_ptr->data_c());
          stats = SummarizeTensor(start_addr, num_elements, min_max_enabled, mean_sd_enabled);
          auto start_addr_prev =
            (need_prev && have_prev
               ? reinterpret_cast<uint8_t *>(tensor_loader_->GetPrevTensor(tensor_name)->GetTensor()->data_c())
               : NULL);
          stats = SummarizeTensor(start_addr, start_addr_prev, num_elements, min_max_enabled, mean_sd_enabled,
                                  zero_percentage_enabled, tensor_update_ratio_mean_enabled, allclose_enabled);
          break;
        }
        case kNumberTypeInt8: {
          auto start_addr = reinterpret_cast<int8_t *>(tensor_ptr->data_c());
          stats = SummarizeTensor(start_addr, num_elements, min_max_enabled, mean_sd_enabled);
          auto start_addr_prev =
            (need_prev && have_prev
               ? reinterpret_cast<int8_t *>(tensor_loader_->GetPrevTensor(tensor_name)->GetTensor()->data_c())
               : NULL);
          stats = SummarizeTensor(start_addr, start_addr_prev, num_elements, min_max_enabled, mean_sd_enabled,
                                  zero_percentage_enabled, tensor_update_ratio_mean_enabled, allclose_enabled);
          break;
        }
        case kNumberTypeUInt16: {
          auto start_addr = reinterpret_cast<uint16_t *>(tensor_ptr->data_c());
          stats = SummarizeTensor(start_addr, num_elements, min_max_enabled, mean_sd_enabled);
          auto start_addr_prev =
            (need_prev && have_prev
               ? reinterpret_cast<uint16_t *>(tensor_loader_->GetPrevTensor(tensor_name)->GetTensor()->data_c())
               : NULL);
          stats = SummarizeTensor(start_addr, start_addr_prev, num_elements, min_max_enabled, mean_sd_enabled,
                                  zero_percentage_enabled, tensor_update_ratio_mean_enabled, allclose_enabled);
          break;
        }
        case kNumberTypeInt16: {
          auto start_addr = reinterpret_cast<int16_t *>(tensor_ptr->data_c());
          stats = SummarizeTensor(start_addr, num_elements, min_max_enabled, mean_sd_enabled);
          auto start_addr_prev =
            (need_prev && have_prev
               ? reinterpret_cast<int16_t *>(tensor_loader_->GetPrevTensor(tensor_name)->GetTensor()->data_c())
               : NULL);
          stats = SummarizeTensor(start_addr, start_addr_prev, num_elements, min_max_enabled, mean_sd_enabled,
                                  zero_percentage_enabled, tensor_update_ratio_mean_enabled, allclose_enabled);
          break;
        }
        case kNumberTypeUInt32: {
          auto start_addr = reinterpret_cast<uint32_t *>(tensor_ptr->data_c());
          stats = SummarizeTensor(start_addr, num_elements, min_max_enabled, mean_sd_enabled);
          auto start_addr_prev =
            (need_prev && have_prev
               ? reinterpret_cast<uint32_t *>(tensor_loader_->GetPrevTensor(tensor_name)->GetTensor()->data_c())
               : NULL);
          stats = SummarizeTensor(start_addr, start_addr_prev, num_elements, min_max_enabled, mean_sd_enabled,
                                  zero_percentage_enabled, tensor_update_ratio_mean_enabled, allclose_enabled);
          break;
        }
        case kNumberTypeInt32:
        case kNumberTypeInt: {
          auto start_addr = reinterpret_cast<int32_t *>(tensor_ptr->data_c());
          stats = SummarizeTensor(start_addr, num_elements, min_max_enabled, mean_sd_enabled);
          auto start_addr_prev =
            (need_prev && have_prev
               ? reinterpret_cast<int32_t *>(tensor_loader_->GetPrevTensor(tensor_name)->GetTensor()->data_c())
               : NULL);
          stats = SummarizeTensor(start_addr, start_addr_prev, num_elements, min_max_enabled, mean_sd_enabled,
                                  zero_percentage_enabled, tensor_update_ratio_mean_enabled, allclose_enabled);
          break;
        }
        case kNumberTypeUInt64: {
          auto start_addr = reinterpret_cast<uint64_t *>(tensor_ptr->data_c());
          stats = SummarizeTensor(start_addr, num_elements, min_max_enabled, mean_sd_enabled);
          auto start_addr_prev =
            (need_prev && have_prev
               ? reinterpret_cast<uint64_t *>(tensor_loader_->GetPrevTensor(tensor_name)->GetTensor()->data_c())
               : NULL);
          stats = SummarizeTensor(start_addr, start_addr_prev, num_elements, min_max_enabled, mean_sd_enabled,
                                  zero_percentage_enabled, tensor_update_ratio_mean_enabled, allclose_enabled);
          break;
        }
        case kNumberTypeInt64: {
          auto start_addr = reinterpret_cast<int64_t *>(tensor_ptr->data_c());
          stats = SummarizeTensor(start_addr, num_elements, min_max_enabled, mean_sd_enabled);
          auto start_addr_prev =
            (need_prev && have_prev
               ? reinterpret_cast<int64_t *>(tensor_loader_->GetPrevTensor(tensor_name)->GetTensor()->data_c())
               : NULL);
          stats = SummarizeTensor(start_addr, start_addr_prev, num_elements, min_max_enabled, mean_sd_enabled,
                                  zero_percentage_enabled, tensor_update_ratio_mean_enabled, allclose_enabled);
          break;
        }
        case kNumberTypeFloat16: {
          auto start_addr = reinterpret_cast<float16 *>(tensor_ptr->data_c());
          stats = SummarizeTensor(start_addr, num_elements, min_max_enabled, mean_sd_enabled);
          auto start_addr_prev =
            (need_prev && have_prev
               ? reinterpret_cast<float16 *>(tensor_loader_->GetPrevTensor(tensor_name)->GetTensor()->data_c())
               : NULL);
          stats = SummarizeTensor(start_addr, start_addr_prev, num_elements, min_max_enabled, mean_sd_enabled,
                                  zero_percentage_enabled, tensor_update_ratio_mean_enabled, allclose_enabled);
          break;
        }
        case kNumberTypeFloat32:
        case kNumberTypeFloat: {
          auto start_addr = reinterpret_cast<float *>(tensor_ptr->data_c());
          stats = SummarizeTensor(start_addr, num_elements, min_max_enabled, mean_sd_enabled);
          auto start_addr_prev =
            (need_prev && have_prev
               ? reinterpret_cast<float *>(tensor_loader_->GetPrevTensor(tensor_name)->GetTensor()->data_c())
               : NULL);
          stats = SummarizeTensor(start_addr, start_addr_prev, num_elements, min_max_enabled, mean_sd_enabled,
                                  zero_percentage_enabled, tensor_update_ratio_mean_enabled, allclose_enabled);
          break;
        }
        case kNumberTypeFloat64: {
          auto start_addr = reinterpret_cast<double *>(tensor_ptr->data_c());
          stats = SummarizeTensor(start_addr, num_elements, min_max_enabled, mean_sd_enabled);
          auto start_addr_prev =
            (need_prev && have_prev
               ? reinterpret_cast<double *>(tensor_loader_->GetPrevTensor(tensor_name)->GetTensor()->data_c())
               : NULL);
          stats = SummarizeTensor(start_addr, start_addr_prev, num_elements, min_max_enabled, mean_sd_enabled,
                                  zero_percentage_enabled, tensor_update_ratio_mean_enabled, allclose_enabled);
          break;
        }
        default:
@@ -185,31 +284,97 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector

    for (auto &it : watchpoints_to_check_table) {
      auto wp_id = it.second.id;
      std::vector<bool> hit_p;
      CONDITION_TYPE enabled_condition = it.second.condition.type;
      bool hit = (enabled_condition == HAS_NAN && stats.has_nan) || (enabled_condition == HAS_INF && stats.has_inf) ||
                 (enabled_condition == GENERAL_OVERFLOW && (stats.has_nan || stats.has_inf)) ||
                 (enabled_condition == IS_OVERFLOW &&
                  std::find(op_overflows.begin(), op_overflows.end(), tensor_name_no_slot) != op_overflows.end());

      if (enabled_condition > 2) {
      if (enabled_condition > 2 && enabled_condition != GENERAL_OVERFLOW) {
        if (stats.has_inf || stats.has_nan) {
          MS_LOG(WARNING) << "NaN or/and INF present in tensor: " << tensor_name << ". Cannot check "
                          << condition_label[enabled_condition] << " watchpoint.";
        } else {
        } else if (enabled_condition < 13) {
          bool gt = stats.statLookup(enabled_condition) > it.second.condition.parameter;
          bool lt = stats.statLookup(enabled_condition) < it.second.condition.parameter;
          hit |= it.second.condition.comparison == "GT" ? gt : lt;
        } else {
          std::vector<parameter_t> parameter_list_item = it.second.parameter_list;
          for (auto &p : parameter_list_item) {
            if (p.disabled == false) {
              bool p_hit = false;
              if (p.name == "zero_percentage_ge") {
                p_hit = stats.parmLookup(STAT_ZERO_PERCENTAGE) >= p.value;
              } else if (p.name == "max_gt") {
                p_hit = stats.parmLookup(STAT_MAX) > p.value;
              } else if (p.name == "max_lt") {
                p_hit = stats.parmLookup(STAT_MAX) < p.value;
              } else if (p.name == "min_gt") {
                p_hit = stats.parmLookup(STAT_MIN) > p.value;
              } else if (p.name == "min_lt") {
                p_hit = stats.parmLookup(STAT_MIN) < p.value;
              } else if (p.name == "mean_gt") {
                p_hit = stats.parmLookup(STAT_MEAN) > p.value;
              } else if (p.name == "mean_lt") {
                p_hit = stats.parmLookup(STAT_MEAN) < p.value;
              } else if (p.name == "abs_mean_gt") {
                p_hit = std::abs(stats.parmLookup(STAT_MEAN)) > p.value;
              } else if (p.name == "abs_mean_lt") {
                p_hit = std::abs(stats.parmLookup(STAT_MEAN)) < p.value;
              } else if (p.name == "abs_update_ratio_mean_gt") {
                p_hit = stats.parmLookup(STAT_TENSOR_UPDATE_RATIO_MEAN) > p.value;
              } else if (p.name == "abs_update_ratio_mean_lt") {
                p_hit = stats.parmLookup(STAT_TENSOR_UPDATE_RATIO_MEAN) < p.value;
              }
              hit |= p_hit;
              hit_p.push_back(p_hit);
            } else {
              hit_p.push_back(false);
            }
          }

          hit |= (enabled_condition == NOT_CHANGED && stats.parmLookup(STAT_ALLCLOSE));

          if (hit) hit_parms.push_back(hit_p);
        }
      }
      if (hit) hit_encountered.push_back(wp_id);
    }

    unsigned int index_parm_list = 0;
    for (auto it_hit_id = hit_encountered.begin(); it_hit_id != hit_encountered.end(); ++it_hit_id) {
      if (watchpoint_table.find(*it_hit_id) != watchpoint_table.end()) {
        name->push_back(tensor_name_no_slot);
        // return fully qualified name for weights and bias to MI
        auto found_dot = tensor_name_no_slot.find_last_of('.');
        if (found_dot != std::string::npos && (tensor_name_no_slot.substr(found_dot + 1) == "weight" ||
                                               tensor_name_no_slot.substr(found_dot + 1) == "bias")) {
          auto check_node_list = watchpoint_table.find(*it_hit_id)->second.check_node_list;
          for (auto check_node : check_node_list) {
            std::string w_name = std::get<0>(check_node);
            auto found_slash = w_name.find_last_of('/');
            if (found_slash != std::string::npos && w_name.substr(found_slash + 1) == tensor_name_no_slot) {
              name->push_back(w_name);
            }
          }
        } else {
          name->push_back(tensor_name_no_slot);
        }

        slot->push_back(tensor_slot);
        int condition_item = watchpoint_table.find(*it_hit_id)->second.condition.type;
        condition->push_back(condition_item);
        watchpoint_id->push_back(*it_hit_id);
        std::vector<parameter_t> parameter_list_item = watchpoint_table.find(*it_hit_id)->second.parameter_list;
        if (condition_item >= 13) {
          unsigned int index_hit_parm = 0;
          for (auto &p : parameter_list_item) {
            p.hit = hit_parms[index_parm_list][index_hit_parm];
            index_hit_parm++;
          }
          index_parm_list++;
        }
        parameters->push_back(parameter_list_item);
      }
      watchpoints_to_check_table.erase(*it_hit_id);
    }
@@ -234,7 +399,7 @@ void DebugServices::ReadNodesTensors(std::vector<std::string> name, std::vector<
  }
 }

 bool DebugServices::IsWatchPoint(std::string kernel_name) {
 bool DebugServices::IsWatchPoint(std::string kernel_name, const CNodePtr &kernel) {
  bool ret = false;
  for (auto w_table_item : watchpoint_table) {
    auto check_node_list = std::get<1>(w_table_item).check_node_list;
@@ -243,7 +408,7 @@ bool DebugServices::IsWatchPoint(std::string kernel_name) {
      bool w_type = std::get<1>(check_node);
      if ((w_type == true &&
           ((kernel_name.find(w_name) != string::npos && kernel_name.rfind(w_name, 0) == 0) || w_name == "*")) ||
          (w_type == false && kernel_name == w_name)) {
          (w_type == false && (kernel_name == w_name || IsWatchPointNodeInput(w_name, kernel)))) {
        ret = true;
        return ret;
      }
@@ -252,6 +417,39 @@ bool DebugServices::IsWatchPoint(std::string kernel_name) {
  return ret;
 }

 bool DebugServices::IsWatchPointNodeInput(std::string w_name, const CNodePtr &kernel) {
  if (kernel) {
    auto input_size = AnfAlgo::GetInputTensorNum(kernel);
    for (size_t j = 0; j < input_size; ++j) {
      auto input_kernel = kernel->input(j + 1);
      std::string input_kernel_name = input_kernel->fullname_with_scope();
      auto found = w_name.find_last_of('/');
      if (found != std::string::npos && w_name.substr(found + 1) == input_kernel_name) return true;
    }
    return false;
  } else {
    return false;
  }
 }

 void DebugServices::AddWeightsBiasInputs(std::vector<std::shared_ptr<TensorData>> *tensor_list,
                                         const CNodePtr &kernel) {
  if (kernel) {
    auto input_size = AnfAlgo::GetInputTensorNum(kernel);
    for (size_t j = 0; j < input_size; ++j) {
      auto input_kernel = kernel->input(j + 1);
      std::string input_kernel_name = input_kernel->fullname_with_scope();
      std::string locate_tensor = input_kernel_name + ":0";
      std::map<std::string, std::shared_ptr<TensorData>> tensor_map = tensor_loader_->GetTensorMap();
      std::map<std::string, std::shared_ptr<TensorData>>::iterator iter;
      iter = tensor_map.find(locate_tensor);
      if (iter != tensor_map.end()) {
        tensor_list->push_back(iter->second);
      }
    }
  }
 }

 TensorLoader *DebugServices::tensor_loader() const { return tensor_loader_; }
 std::unordered_map<unsigned int, DebugServices::watchpoint_t> DebugServices::GetWatchpointTable() {
  return watchpoint_table;
--- a/mindspore/ccsrc/debug/debug_services.h
+++ b/mindspore/ccsrc/debug/debug_services.h
@@ -52,19 +52,37 @@ class DebugServices {
    MEAN_GT,
    MEAN_LT,
    SD_GT,
    SD_LT
    SD_LT,
    GENERAL_OVERFLOW,
    INIT,
    TOO_LARGE,
    TOO_SMALL,
    ALL_ZERO,
    CHANGE_TOO_LARGE,
    CHANGE_TOO_SMALL,
    NOT_CHANGED
  };

  enum STAT_TYPE { STAT_MIN, STAT_MAX, STAT_MEAN, STAT_ZERO_PERCENTAGE, STAT_TENSOR_UPDATE_RATIO_MEAN, STAT_ALLCLOSE };

  typedef struct condition {
    CONDITION_TYPE type;
    float parameter = 0;
    std::string comparison;
  } condition_t;

  typedef struct parameter {
    std::string name;
    bool disabled;
    double_t value;
    bool hit;
  } parameter_t;

  typedef struct watchpoint {
    unsigned int id;
    condition_t condition;
    std::vector<std::tuple<std::string, bool>> check_node_list;
    std::vector<parameter_t> parameter_list;
    size_t location = 0;

    bool IsNodeIncluded(const std::string &tensor_name) {
@@ -72,6 +90,8 @@ class DebugServices {
      for (auto check_node : check_node_list) {
        std::string w_name = std::get<0>(check_node);
        bool w_type = std::get<1>(check_node);
        auto found = w_name.find_last_of('/');
        if (found != std::string::npos && w_name.substr(found + 1) == tensor_name) return true;
        if ((w_type && (tensor_name.find(w_name) == location || w_name == "*")) || (!w_type && node_name == w_name)) {
          return true;
        }
@@ -81,15 +101,27 @@ class DebugServices {

    bool min_max_enabled() {
      return condition.type == MAX_LT || condition.type == MAX_GT || condition.type == MIN_LT ||
             condition.type == MIN_GT || condition.type == MAX_MIN_LT || condition.type == MAX_MIN_GT;
             condition.type == MIN_GT || condition.type == MAX_MIN_LT || condition.type == MAX_MIN_GT ||
             (condition.type == INIT && (!parameter_list[1].disabled || !parameter_list[2].disabled)) ||
             (condition.type == TOO_LARGE && (!parameter_list[1].disabled || !parameter_list[2].disabled)) ||
             (condition.type == TOO_SMALL && (!parameter_list[1].disabled || !parameter_list[2].disabled));
    }
    // inf or nan related condition set
    bool inf_nan_enabled() { return condition.type == HAS_INF || condition.type == HAS_NAN; }
    bool inf_nan_enabled() {
      return condition.type == HAS_INF || condition.type == HAS_NAN || condition.type == GENERAL_OVERFLOW;
    }
    // mean or sd related condition set
    bool mean_sd_enabled() {
      return condition.type == MEAN_LT || condition.type == MEAN_GT || condition.type == SD_LT ||
             condition.type == SD_GT;
             condition.type == SD_GT ||
             (condition.type == TOO_LARGE && (!parameter_list[0].disabled || !parameter_list[3].disabled)) ||
             (condition.type == TOO_SMALL && (!parameter_list[0].disabled || !parameter_list[3].disabled));
    }
    bool zero_percentage_enabled() { return condition.type == ALL_ZERO || condition.type == INIT; }
    bool tensor_update_ratio_mean_enabled() {
      return condition.type == CHANGE_TOO_LARGE || condition.type == CHANGE_TOO_SMALL;
    }
    bool allclose_enabled() { return condition.type == NOT_CHANGED; }
  } watchpoint_t;

  struct tensor_stats {
@@ -100,6 +132,9 @@ class DebugServices {
    unsigned int n = 0;
    double mean = 0.0;
    double m2 = 0.0;
    double zero_percentage = 0.0;
    double tensor_update_ratio_mean = -1;
    bool allclose = false;

    double statLookup(CONDITION_TYPE type) const {
      if (type == MAX_GT || type == MAX_LT) return max;
@@ -110,6 +145,16 @@ class DebugServices {
      return std::numeric_limits<double>::quiet_NaN();
    }

    double parmLookup(STAT_TYPE type) const {
      if (type == STAT_MAX) return max;
      if (type == STAT_MIN) return min;
      if (type == STAT_MEAN) return mean;
      if (type == STAT_ZERO_PERCENTAGE) return zero_percentage;
      if (type == STAT_TENSOR_UPDATE_RATIO_MEAN) return tensor_update_ratio_mean;
      if (type == STAT_ALLCLOSE) return allclose;
      return std::numeric_limits<double>::quiet_NaN();
    }

    double getMean() const { return mean; }

    double getVariance() const {
@@ -124,19 +169,25 @@ class DebugServices {
  };

  void AddWatchpoint(unsigned int id, unsigned int watch_condition, float parameter,
                     const std::vector<std::tuple<std::string, bool>> &check_node_list);
                     const std::vector<std::tuple<std::string, bool>> &check_node_list,
                     const std::vector<parameter_t> &parameter_list);

  void RemoveWatchpoint(unsigned int id);

  void CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<int> *condition,
                        std::vector<unsigned int> *watchpoint_id, const std::vector<std::string> &op_overflows,
                        const std::vector<std::shared_ptr<TensorData>> &tensor_list);
                        std::vector<unsigned int> *watchpoint_id, std::vector<std::vector<parameter_t>> *parameters,
                        const std::vector<std::string> &op_overflows,
                        const std::vector<std::shared_ptr<TensorData>> &tensor_list, bool init_dbg_suspend);

  void ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
                        std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
                        std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape);

  bool IsWatchPoint(std::string kernel_name);
  bool IsWatchPoint(std::string kernel_name, const CNodePtr &kernel = nullptr);

  bool IsWatchPointNodeInput(std::string w_name, const CNodePtr &kernel);

  void AddWeightsBiasInputs(std::vector<std::shared_ptr<TensorData>> *tensor_list, const CNodePtr &kernel);

  TensorLoader *tensor_loader() const;

@@ -146,14 +197,19 @@ class DebugServices {
  std::mutex lock_;

  std::unordered_map<unsigned int, watchpoint_t> watchpoint_table;
  std::vector<std::string> condition_label = {"HAS_NAN", "HAS_INF", "IS_OVERFLOW", "MAX_GT",     "MAX_LT",
                                              "MIN_GT",  "MIN_LT",  "MAX_MIN_GT",  "MAX_MIN_LT", "MEAN_GT",
                                              "MEAN_LT", "SD_GT",   "SD_LT"};
  std::vector<std::string> condition_label = {
    "HAS_NAN",    "HAS_INF",   "IS_OVERFLOW", "MAX_GT",           "MAX_LT",
    "MIN_GT",     "MIN_LT",    "MAX_MIN_GT",  "MAX_MIN_LT",       "MEAN_GT",
    "MEAN_LT",    "SD_GT",     "SD_LT",       "GENERAL_OVERFLOW", "INIT",
    "TOO_LARGE",  "TOO_SMALL", "ALL_ZERO",    "CHANGE_TOO_LARGE", "CHANGE_TOO_SMALL",
    "NOT_CHANGED"};

  TensorLoader *tensor_loader_;

  template <typename T>
  static tensor_stats SummarizeTensor(const T *start, unsigned int n, bool need_min_max, bool need_mean_sd);
  static tensor_stats SummarizeTensor(const T *start, const T *start_prev, unsigned int n, bool need_min_max,
                                      bool need_mean_sd, bool need_zero_percentage, bool need_tensor_update_ratio_mean,
                                      bool need_allclose);
 };
 }  // namespace mindspore

--- a/mindspore/ccsrc/debug/debugger/debug_grpc.proto
+++ b/mindspore/ccsrc/debug/debugger/debug_grpc.proto
@@ -36,11 +36,11 @@ message Metadata {
  // the full name of current node
  string cur_node = 4;
  // check if training is done.
  bool training_done = 5; 
  bool training_done = 5;
 }

 message Chunk {
    bytes buffer = 1;
  bytes buffer = 1;
 }

 message EventReply {
@@ -61,13 +61,13 @@ message EventReply {
 }

 message RunCMD {
    // step level or node level.  "step" or "node"
    string run_level = 1;
    oneof cmd {
        int32 run_steps = 2;
        // the next node full name
        string node_name = 3;
    }
  // step level or node level.  "step" or "node"
  string run_level = 1;
  oneof cmd {
    int32 run_steps = 2;
    // the next node full name
    string node_name = 3;
  }
 }

 message SetCMD {
@@ -96,10 +96,24 @@ message WatchCondition {
    mean_lt = 10;
    sd_gt = 11;
    sd_lt = 12;
    tensor_general_overflow = 13;
    tensor_initialization = 14;
    tensor_too_large = 15;
    tensor_too_small = 16;
    tensor_all_zero = 17;
    tensor_change_too_large = 18;
    tensor_change_too_small = 19;
    tensor_not_changed = 20;
  }
  Condition condition = 1;
  float value = 2;  // for between condition, there will be two values
  repeated bool include = 3;  // for between condition, define the value is included or not
  float value = 2;
  message Parameter {
    string name = 1;
    bool disabled = 2;
    double value = 3;
    bool hit = 4;  // Whether this parameter is hit when checking tensor.
  }
  repeated Parameter params = 4;
 }

 message WatchNode {
--- a/mindspore/ccsrc/debug/debugger/debugger.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger.cc
@@ -41,6 +41,7 @@ using debugger::TensorProto;
 using debugger::WatchCondition;
 using debugger::WatchCondition_Condition_inf;
 using debugger::WatchCondition_Condition_nan;
 using debugger::WatchCondition_Parameter;
 using debugger::WatchNode;
 using debugger::WatchpointHit;

@@ -67,7 +68,8 @@ Debugger::Debugger()
      is_dataset_graph_(false),
      partial_memory_(false),
      last_overflow_bin_(0),
      overflow_bin_path_("") {
      overflow_bin_path_(""),
      initial_suspend_(true) {
  if (CheckDebuggerEnabled()) {
    // configure partial memory reuse
    partial_memory_ = CheckDebuggerPartialMemoryEnabled();
@@ -292,9 +294,9 @@ void Debugger::PostExecute() {
  }
 }

 bool Debugger::ReadNodeDataRequired() {
 bool Debugger::ReadNodeDataRequired(const CNodePtr &kernel) {
  if (debugger_enabled_ && !is_dataset_graph_) {
    auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_);
    auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_, kernel);
    // if node has a watchpoint on it, is next_to node, or continue_to node then read the kernel tensor data
    if (is_watchpoint || (run_level_ == "node" && (node_name_ == "" || node_name_ == cur_name_))) {
      return true;
@@ -303,19 +305,19 @@ bool Debugger::ReadNodeDataRequired() {
  return false;
 }

 void Debugger::PostExecuteNode() {
 void Debugger::PostExecuteNode(const CNodePtr &kernel) {
  // access lock for public method
  std::lock_guard<std::mutex> a_lock(access_lock_);
  if (pipeline::ExecutorPy::GetDebugTerminate()) {
    return;
  }
  if (debugger_enabled_ && !is_dataset_graph_) {
    auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_);
    auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_, kernel);

    // if kernel is watchpoint,and get hit. suspend.
    bool hit_empty_flag = true;
    if (is_watchpoint) {
      auto hits = CheckWatchpoints(cur_name_);
      auto hits = CheckWatchpoints(cur_name_, kernel);
      if (!hits.empty()) {
        SendWatchpoints(hits);
        CommandLoop();
@@ -477,6 +479,8 @@ void Debugger::CommandLoop() {
          MS_LOG(INFO) << "rechecking all watchpoints";
          SendWatchpoints(CheckWatchpoints());
        } else {
          // no longer the initial suspension.
          initial_suspend_ = false;
          // print run cmd content
          // get run_level and node_name
          run_level_ = GetRunLevel(reply);
@@ -494,10 +498,17 @@ void Debugger::CommandLoop() {
        {
          // print set cmd content
          ProtoVector<WatchNode> recieved_nodes = GetWatchnodes(reply);
          for (auto node : recieved_nodes) {
          for (const auto &node : recieved_nodes) {
            MS_LOG(INFO) << "node name: " << node.node_name();
            MS_LOG(INFO) << "node type: " << node.node_type();
          }

          ProtoVector<WatchCondition_Parameter> parameters = GetParameters(reply);
          for (const auto &parameter : parameters) {
            MS_LOG(INFO) << "parameter name: " << parameter.name();
            MS_LOG(INFO) << "parameter is disabled: " << parameter.disabled();
            MS_LOG(INFO) << "parameter value: " << parameter.value();
          }
          MS_LOG(INFO) << "condition: " << GetWatchcondition(reply).condition();
          MS_LOG(INFO) << "id: " << GetWatchpointID(reply);
          MS_LOG(INFO) << "delete: " << GetWatchpointDelete(reply);
@@ -506,7 +517,7 @@ void Debugger::CommandLoop() {
        if (GetWatchpointDelete(reply)) {
          RemoveWatchpoint(GetWatchpointID(reply));
        } else {
          SetWatchpoint(GetWatchnodes(reply), GetWatchcondition(reply), GetWatchpointID(reply));
          SetWatchpoint(GetWatchnodes(reply), GetWatchcondition(reply), GetWatchpointID(reply), GetParameters(reply));
        }
        break;
      case DebuggerCommand::kViewCMD:
@@ -558,13 +569,25 @@ void AddTensorProtoInfo(TensorProto *tensor_item, TensorProto tensor) {
  tensor_item->clear_dims();
 }

 void Debugger::SetWatchpoint(const ProtoVector<WatchNode> &nodes, const WatchCondition &condition, const int32_t id) {
 void Debugger::SetWatchpoint(const ProtoVector<WatchNode> &nodes, const WatchCondition &condition, const int32_t id,
                             const ProtoVector<WatchCondition_Parameter> &parameters) {
  std::vector<std::tuple<std::string, bool>> check_node_list;
  std::vector<DebugServices::parameter_t> parameter_list;

  std::transform(nodes.begin(), nodes.end(), std::back_inserter(check_node_list),
                 [](WatchNode node) -> std::tuple<std::string, bool> {
                 [](const WatchNode &node) -> std::tuple<std::string, bool> {
                   return make_tuple(node.node_name(), node.node_type() == "scope");
                 });
  debug_services_->AddWatchpoint(id, condition.condition(), condition.value(), check_node_list);

  std::transform(
    parameters.begin(), parameters.end(), std::back_inserter(parameter_list),
    [](const WatchCondition_Parameter &parameter) -> DebugServices::parameter_t {
      return DebugServices::parameter_t{parameter.name(), parameter.disabled(), parameter.value(), parameter.hit()};
    });
  debug_services_->AddWatchpoint(id, condition.condition(), condition.value(), check_node_list, parameter_list);
  if (initial_suspend_ &&
      static_cast<DebugServices::CONDITION_TYPE>(condition.condition()) == DebugServices::CONDITION_TYPE::INIT)
    SendWatchpoints(CheckWatchpoints());
 }

 void Debugger::RemoveWatchpoint(const int32_t id) { debug_services_->RemoveWatchpoint(id); }
@@ -637,12 +660,13 @@ void Debugger::Exit() {
  }
 }

 std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode) {
 std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode, const CNodePtr &kernel) {
  std::vector<std::string> name;
  std::vector<std::string> slot;
  std::vector<int> condition;
  std::vector<unsigned int> watchpoint_id;
  std::vector<std::string> overflow_ops;
  std::vector<std::vector<DebugServices::parameter_t>> parameters;
 #ifdef ENABLE_D
  overflow_ops = CheckOpOverflow();
 #endif
@@ -652,12 +676,14 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode
    tensor_list = tensor_loader->GetTensor();
  } else {
    tensor_list = tensor_loader->GetNodeTensorMap(watchnode);
    debug_services_->AddWeightsBiasInputs(&tensor_list, kernel);
  }

  debug_services_->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, overflow_ops, tensor_list);
  debug_services_->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, &parameters, overflow_ops, tensor_list,
                                    initial_suspend_);
  std::list<WatchpointHit> hits;
  for (unsigned int i = 0; i < name.size(); i++) {
    WatchpointHit hit;
    std::vector<DebugServices::parameter_t> &parameter = parameters[i];
    hit.set_id(watchpoint_id[i]);

    // here TensorProto act as a tensor indicator, not sending tensor content
@@ -668,7 +694,13 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode

    WatchCondition *condition_item = hit.mutable_watch_condition();
    condition_item->set_condition(debugger::WatchCondition_Condition(condition[i]));

    for (const auto &p : parameter) {
      auto x = condition_item->mutable_params()->Add();
      x->set_name(p.name);
      x->set_disabled(p.disabled);
      x->set_value(p.value);
      x->set_hit(p.hit);
    }
    hits.push_back(hit);
  }
  return hits;
@@ -710,6 +742,14 @@ DebuggerCommand GetCommand(const EventReply &reply) {
  return cmd;
 }

 ProtoVector<WatchCondition_Parameter> GetParameters(const EventReply &reply) {
  if (!reply.has_set_cmd() || !reply.set_cmd().has_watch_condition()) {
    MS_LOG(ERROR) << "Error: Can not get Parameters from command. Returning default value: ProtoVector<Parameter>().";
    return ProtoVector<WatchCondition_Parameter>();
  }
  return reply.set_cmd().watch_condition().params();
 }

 ProtoVector<WatchNode> GetWatchnodes(const EventReply &reply) {
  if (!reply.has_set_cmd()) {
    MS_LOG(ERROR) << "Error: Not SetCMD, can not get WatchNodes. Returning default value: ProtoVector<WatchNode>().";
@@ -954,7 +994,7 @@ void Debugger::LoadGraphOutputs() {
    std::string kernel_name = node->fullname_with_scope();
    auto output_size = AnfAlgo::GetOutputTensorNum(node);
    if (partial_memory_) {
      if (!debug_services_->IsWatchPoint(kernel_name)) {
      if (!debug_services_->IsWatchPoint(kernel_name, node)) {
        continue;
      }
    }
--- a/mindspore/ccsrc/debug/debugger/debugger.h
+++ b/mindspore/ccsrc/debug/debugger/debugger.h
@@ -33,6 +33,7 @@ using debugger::GraphProto;
 using debugger::ModelProto;
 using debugger::TensorProto;
 using debugger::WatchCondition;
 using debugger::WatchCondition_Parameter;
 using debugger::WatchNode;
 using debugger::WatchpointHit;

@@ -73,9 +74,9 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
  // don't need a graph_ptr because it is saved during pre_execute
  void PostExecute();

  bool ReadNodeDataRequired();
  bool ReadNodeDataRequired(const CNodePtr &kernel);

  void PostExecuteNode();
  void PostExecuteNode(const CNodePtr &kernel);

  // suspend the execution after a debug_op
  void PostDebugOp();
@@ -148,7 +149,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
  void CommandLoop();

  // set what nodes and conditions to watch
  void SetWatchpoint(const ProtoVector<WatchNode> &nodes, const WatchCondition &condition, const int32_t id);
  void SetWatchpoint(const ProtoVector<WatchNode> &nodes, const WatchCondition &condition, const int32_t id,
                     const ProtoVector<WatchCondition_Parameter> &parameters);

  // remove watchpoint with id
  void RemoveWatchpoint(const int32_t id);
@@ -161,7 +163,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> {

  // analyze tensors and check watchpoint conditions
  // return names of tensors and what condition they hit
  std::list<WatchpointHit> CheckWatchpoints(const std::string &watchnode = std::string());
  std::list<WatchpointHit> CheckWatchpoints(const std::string &watchnode = std::string(),
                                            const CNodePtr &kernel = NULL);

  // send watchpoints that hit
  void SendWatchpoints(const std::list<WatchpointHit> &points);
@@ -192,6 +195,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
  std::map<std::pair<uint32_t, uint32_t>, std::string> stream_task_to_opname_;
  double last_overflow_bin_;
  std::string overflow_bin_path_;
  // flag to keep track of the very first suspension of debugger
  bool initial_suspend_;
  // singleton
  static std::mutex instance_lock_;
  static std::shared_ptr<Debugger> debugger_;
@@ -210,6 +215,7 @@ DataType GetDebuggerNumberDataType(const TypePtr &type);
 DebuggerCommand GetCommand(const EventReply &reply);

 // parse other data out of EventReply
 ProtoVector<WatchCondition_Parameter> GetParameters(const EventReply &reply);
 ProtoVector<WatchNode> GetWatchnodes(const EventReply &reply);
 std::string GetNodeName(const EventReply &reply);
 std::string GetRunLevel(const EventReply &reply);
--- a/mindspore/ccsrc/debug/tensor_load.h
+++ b/mindspore/ccsrc/debug/tensor_load.h
@@ -56,6 +56,8 @@ class TensorLoader {

  std::map<std::string, std::shared_ptr<TensorData>> GetTensorMap() { return tensor_list_map; }

  std::shared_ptr<TensorData> GetPrevTensor(std::string tensor_name) { return tensor_list_map[tensor_name + ":prev"]; }

  std::vector<std::shared_ptr<TensorData>> GetNodeTensorMap(std::string node_name) {
    std::vector<std::shared_ptr<TensorData>> tensors;
    for (auto itr = node_tensor_map.begin(); itr != node_tensor_map.end(); itr++) {
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
@@ -113,7 +113,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
      read_data = true;
    }
  } else if (debugger->debugger_enabled()) {
    read_data = debugger->ReadNodeDataRequired();
    read_data = debugger->ReadNodeDataRequired(kernel);
  }
  if (!read_data) {
    return;
@@ -168,7 +168,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
      }
    }
  }
  debugger->PostExecuteNode();
  debugger->PostExecuteNode(kernel);
 }
 }  // namespace