zzy34407230
/
mindspore2022

/**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#include "debug/debug_services.h"
namespace mindspore {

DebugServices::DebugServices() {
  tensor_loader_ = new TensorLoader();
  uint32_t iter_num = -1;
  tensor_loader_->set_iter_num(iter_num);
}

DebugServices::DebugServices(const DebugServices &other) {
  tensor_loader_ = other.tensor_loader_;
  watchpoint_table = other.watchpoint_table;
}

DebugServices &DebugServices::operator=(const DebugServices &other) {
  if (this != &other) {
    tensor_loader_ = other.tensor_loader_;
    watchpoint_table = other.watchpoint_table;
  }
  return *this;
}

DebugServices::~DebugServices() { delete tensor_loader_; }

void DebugServices::AddWatchpoint(unsigned int id, unsigned int watch_condition,
                                  const std::vector<std::tuple<std::string, bool>> &check_node_list) {
  std::lock_guard<std::mutex> lg(lock_);

  watchpoint_t watchpoint_item;

  watchpoint_item.id = id;

  if (watch_condition == 0) {
    watchpoint_item.conditions.nan.enabled = true;
  } else if (watch_condition == 1) {
    watchpoint_item.conditions.inf.enabled = true;
    watchpoint_item.conditions.neg_inf.enabled = true;
  } else if (watch_condition == 2) {
    watchpoint_item.conditions.overflow.enabled = true;
  }

  watchpoint_item.check_node_list = check_node_list;

  watchpoint_table[id] = watchpoint_item;
}

void DebugServices::RemoveWatchpoint(unsigned int id) {
  std::lock_guard<std::mutex> lg(lock_);
  watchpoint_table.erase(id);
}

void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot,
                                     std::vector<int> *condition, std::vector<unsigned int> *watchpoint_id,
                                     const std::vector<std::string> &op_overflows) {
  std::lock_guard<std::mutex> lg(lock_);

  std::vector<std::shared_ptr<TensorData>> tensor_list = tensor_loader_->GetTensor();

  std::string current_tensor_name;
  std::unordered_map<unsigned int, watchpoint_t> watchpoints_to_check_table;
  const size_t location = 0;

  for (std::size_t i = 0; i < tensor_list.size(); i++) {
    current_tensor_name = tensor_list[i]->GetName();
    std::string tensor_slot = std::to_string(tensor_list[i]->GetSlot());
    mindspore::tensor::TensorPtr tensor_ptr = tensor_list[i]->GetTensor();
    int tensor_data_type = tensor_ptr->data_type_c();

    // check if we need to analyze this node and for which watchpoints we will check
    // create a list of watchpoints to check
    watchpoints_to_check_table.clear();
    for (auto w_table_item : watchpoint_table) {
      // if the watchpoint is checking for a nan or inf and the current tensor is not of a float type, then
      // don't check the watchpoint for this tensor
      if (std::get<1>(w_table_item).conditions.inf.enabled || std::get<1>(w_table_item).conditions.neg_inf.enabled ||
          std::get<1>(w_table_item).conditions.nan.enabled) {
        if (tensor_data_type != kNumberTypeFloat16 && tensor_data_type != kNumberTypeFloat &&
            tensor_data_type != kNumberTypeFloat32 && tensor_data_type != kNumberTypeFloat64) {
          continue;
        }
      }

      auto check_node_list = std::get<1>(w_table_item).check_node_list;

      for (auto check_node : check_node_list) {
        std::string w_name = std::get<0>(check_node);
        bool w_type = std::get<1>(check_node);

        // check if the current node tensor name is included the watchpoint
        std::string current_node_name = current_tensor_name.substr(0, current_tensor_name.find_first_of(":"));
        if ((w_type == true && (current_tensor_name.find(w_name) == location || w_name == "*")) ||
            (w_type == false && current_node_name == w_name)) {
          watchpoints_to_check_table[w_table_item.second.id] = w_table_item.second;
          break;
        }
      }
    }
    std::vector<unsigned int> hit_encountered;

    // handle watchpoint conditions that do not require per element checks
    for (auto it_w_table_check = watchpoints_to_check_table.begin();
         it_w_table_check != watchpoints_to_check_table.end(); ++it_w_table_check) {
      if (it_w_table_check->second.conditions.overflow.enabled) {
        std::string name_no_slot = current_tensor_name.substr(0, current_tensor_name.find_first_of(":"));
        if (std::find(op_overflows.begin(), op_overflows.end(), name_no_slot) != op_overflows.end()) {
          hit_encountered.push_back(it_w_table_check->second.id);
        }
      }
    }

    if (hit_encountered.size()) {
      HandleWatchpointHits(hit_encountered, name, slot, condition, watchpoint_id, current_tensor_name,
                           &watchpoints_to_check_table, tensor_slot);
      hit_encountered.clear();
    }

    // need to add support for float16 and float64, and other types when we support conditions beyond inf and nan
    if (tensor_data_type != kNumberTypeFloat && tensor_data_type != kNumberTypeFloat32) {
      continue;
    }

    // check if no watchpoints are remaining
    if (watchpoints_to_check_table.empty()) {
      continue;
    }

    float *start_addr = reinterpret_cast<float *>(tensor_ptr->data_c());
    unsigned int num_elements = (tensor_ptr->data().nbytes()) / sizeof(float);
    std::unordered_map<unsigned int, watchpoint_t>::iterator it_w_table_check;

    for (unsigned int index = 0; index < num_elements; index++) {
      float x = start_addr[index];
      it_w_table_check = watchpoints_to_check_table.begin();

      while (it_w_table_check != watchpoints_to_check_table.end()) {
        if ((it_w_table_check->second.conditions.inf.enabled || it_w_table_check->second.conditions.neg_inf.enabled) &&
            isinf(x)) {
          hit_encountered.push_back(it_w_table_check->second.id);
        } else if (it_w_table_check->second.conditions.nan.enabled && isnan(x)) {
          hit_encountered.push_back(it_w_table_check->second.id);
        }
        ++it_w_table_check;
      }

      if (hit_encountered.size()) {
        HandleWatchpointHits(hit_encountered, name, slot, condition, watchpoint_id, current_tensor_name,
                             &watchpoints_to_check_table, tensor_slot);
        hit_encountered.clear();
      }

      if (watchpoints_to_check_table.empty()) {
        break;
      }
    }
  }
}

void DebugServices::HandleWatchpointHits(const std::vector<unsigned int> &hit_encountered,
                                         std::vector<std::string> *name, std::vector<std::string> *slot,
                                         std::vector<int> *condition, std::vector<unsigned int> *watchpoint_id,
                                         std::string current_tensor_name,
                                         std::unordered_map<unsigned int, watchpoint_t> *watchpoints_to_check_table,
                                         std::string tensor_slot) {
  for (auto it_hit_id = hit_encountered.begin(); it_hit_id != hit_encountered.end(); ++it_hit_id) {
    if (watchpoint_table.find(*it_hit_id) != watchpoint_table.end()) {
      std::string name_no_slot = current_tensor_name.substr(0, current_tensor_name.find_first_of(":"));
      name->push_back(name_no_slot);
      slot->push_back(tensor_slot);

      int condition_item = -1;
      if (watchpoint_table[*it_hit_id].conditions.nan.enabled) {
        condition_item = 0;
      } else if (watchpoint_table[*it_hit_id].conditions.inf.enabled ||
                 watchpoint_table[*it_hit_id].conditions.neg_inf.enabled) {
        condition_item = 1;
      } else if (watchpoint_table[*it_hit_id].conditions.overflow.enabled) {
        condition_item = 2;
      }
      condition->push_back(condition_item);
      watchpoint_id->push_back(*it_hit_id);
    }
    watchpoints_to_check_table->erase(*it_hit_id);
  }
}

void DebugServices::CheckSingleWatchpoint(std::shared_ptr<TensorData> watchtensor, std::string *name, std::string *slot,
                                          char **data_ptr, unsigned int *data_size, int *condition,
                                          unsigned int *wacthpoint_id) {
  std::lock_guard<std::mutex> lg(lock_);

  std::string current_watchtensor_name;
  current_watchtensor_name = watchtensor->GetName();
  mindspore::tensor::TensorPtr tensor_ptr = watchtensor->GetTensor();
  int tensor_data_type = tensor_ptr->data_type_c();
  watchpoint_t watchpoint_to_check;

  for (auto w_table_item : watchpoint_table) {
    auto check_node_list = std::get<1>(w_table_item).check_node_list;
    for (auto check_node : check_node_list) {
      std::string w_name = std::get<0>(check_node);
      bool w_type = std::get<1>(check_node);
      // get current the full info including condition, id..., for current watchtensor
      std::string current_node_name = current_watchtensor_name.substr(0, current_watchtensor_name.find_first_of(":"));
      if ((w_type == true && (current_watchtensor_name.find(w_name) != string::npos || w_name == "*")) ||
          (w_type == false && current_node_name == w_name)) {
        watchpoint_to_check = w_table_item.second;
        // need to add support for float16 and float64, and other types when we support conditions beyond inf and nan
        if (tensor_data_type != kNumberTypeFloat && tensor_data_type != kNumberTypeFloat32) {
          return;
        }
        break;
      }
    }
  }

  float *start_addr = reinterpret_cast<float *>(tensor_ptr->data_c());
  unsigned int num_elements = (tensor_ptr->data().nbytes()) / sizeof(float);

  for (unsigned int index = 0; index < num_elements; index++) {
    float x = start_addr[index];
    if (((watchpoint_to_check.conditions.inf.enabled || watchpoint_to_check.conditions.neg_inf.enabled) && isinf(x)) ||
        (watchpoint_to_check.conditions.nan.enabled && isnan(x))) {
      std::string name_no_slot = current_watchtensor_name.substr(0, current_watchtensor_name.find_first_of(":"));
      *name = name_no_slot;
      *slot = std::to_string(watchtensor->GetSlot());
      *data_ptr = reinterpret_cast<char *>(tensor_ptr->data_c());
      *data_size = tensor_ptr->data().nbytes();
      int condition_item = -1;
      if (watchpoint_to_check.conditions.nan.enabled) {
        condition_item = 0;
      } else if (watchpoint_to_check.conditions.inf.enabled || watchpoint_to_check.conditions.neg_inf.enabled) {
        condition_item = 1;
      }
      *condition = condition_item;

      *wacthpoint_id = watchpoint_to_check.id;
    }
  }
}

void DebugServices::ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
                                     std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
                                     std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape) {
  std::vector<std::tuple<std::string, std::shared_ptr<TensorData>>> result_list;
  tensor_loader_->SearchTensors(name, &result_list);

  for (auto result : result_list) {
    if (!std::get<1>(result)) {
      continue;
    }
    ret_name->push_back(std::get<0>(result));
    data_ptr->push_back(reinterpret_cast<char *>(std::get<1>(result)->GetTensor()->data_c()));
    data_size->push_back(std::get<1>(result)->GetTensor()->data().nbytes());
    dtype->push_back(std::get<1>(result)->GetTensor()->Dtype());
    shape->push_back(std::get<1>(result)->GetTensor()->shape());
  }
}

bool DebugServices::IsWatchPoint(std::string kernel_name,
                                 std::unordered_map<unsigned int, watchpoint_t> watchpoint_table) {
  bool ret = false;
  for (auto w_table_item : watchpoint_table) {
    auto check_node_list = std::get<1>(w_table_item).check_node_list;
    for (auto check_node : check_node_list) {
      std::string w_name = std::get<0>(check_node);
      bool w_type = std::get<1>(check_node);
      if ((w_type == true &&
           ((kernel_name.find(w_name) != string::npos && kernel_name.rfind(w_name, 0) == 0) || w_name == "*")) ||
          (w_type == false && kernel_name == w_name)) {
        ret = true;
        return ret;
      }
    }
  }
  return ret;
}

TensorLoader *DebugServices::tensor_loader() const { return tensor_loader_; }
std::unordered_map<unsigned int, DebugServices::watchpoint_t> DebugServices::GetWatchpointTable() {
  return watchpoint_table;
}

}  // namespace mindspore