You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

debug_services.cc 7.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "debug/debug_services.h"
  17. namespace mindspore {
  18. DebugServices::DebugServices() {
  19. tensor_loader_ = new TensorLoader();
  20. uint32_t iter_num = -1;
  21. tensor_loader_->set_iter_num(iter_num);
  22. }
  23. DebugServices::DebugServices(const DebugServices &other) {
  24. tensor_loader_ = other.tensor_loader_;
  25. watchpoint_table = other.watchpoint_table;
  26. }
  27. DebugServices &DebugServices::operator=(const DebugServices &other) {
  28. if (this != &other) {
  29. tensor_loader_ = other.tensor_loader_;
  30. watchpoint_table = other.watchpoint_table;
  31. }
  32. return *this;
  33. }
  34. DebugServices::~DebugServices() { delete tensor_loader_; }
  35. void DebugServices::add_watchpoint(unsigned int id, unsigned int watch_condition,
  36. const std::vector<std::tuple<std::string, bool>> &check_node_list) {
  37. std::lock_guard<std::mutex> lg(lock_);
  38. watchpoint_t watchpoint_item;
  39. watchpoint_item.id = id;
  40. if (watch_condition == 0) {
  41. watchpoint_item.conditions.nan.enabled = true;
  42. } else if (watch_condition == 1) {
  43. watchpoint_item.conditions.inf.enabled = true;
  44. watchpoint_item.conditions.neg_inf.enabled = true;
  45. }
  46. watchpoint_item.check_node_list = check_node_list;
  47. watchpoint_table[id] = watchpoint_item;
  48. }
  49. void DebugServices::remove_watchpoint(unsigned int id) {
  50. std::lock_guard<std::mutex> lg(lock_);
  51. watchpoint_table.erase(id);
  52. }
  53. void DebugServices::check_watchpoints(std::vector<std::string> *name, std::vector<std::string> *slot,
  54. std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
  55. std::vector<int> *condition, std::vector<unsigned int> *wacthpoint_id) {
  56. std::lock_guard<std::mutex> lg(lock_);
  57. std::vector<std::shared_ptr<TensorData>> tensor_list = tensor_loader_->GetTensor();
  58. std::string current_tensor_name;
  59. std::unordered_map<unsigned int, watchpoint_t> watchpoints_to_check_table;
  60. for (std::size_t i = 0; i < tensor_list.size(); i++) {
  61. current_tensor_name = tensor_list[i]->GetName();
  62. mindspore::tensor::TensorPtr tensor_ptr = tensor_list[i]->GetTensor();
  63. int tensor_data_type = tensor_ptr->data_type_c();
  64. // check if we need to analyze this node and for which watchpoints we will check
  65. // create a list of watchpoints to check
  66. watchpoints_to_check_table.clear();
  67. for (auto w_table_item : watchpoint_table) {
  68. // if the watchpoint is checking for a nan or inf and the current tensor is not of a float type, then
  69. // don't check the watchpoint for this tensor
  70. if (std::get<1>(w_table_item).conditions.inf.enabled || std::get<1>(w_table_item).conditions.neg_inf.enabled ||
  71. std::get<1>(w_table_item).conditions.nan.enabled) {
  72. if (tensor_data_type != kNumberTypeFloat16 && tensor_data_type != kNumberTypeFloat &&
  73. tensor_data_type != kNumberTypeFloat32 && tensor_data_type != kNumberTypeFloat64) {
  74. continue;
  75. }
  76. }
  77. auto check_node_list = std::get<1>(w_table_item).check_node_list;
  78. for (auto check_node : check_node_list) {
  79. std::string w_name = std::get<0>(check_node);
  80. bool w_type = std::get<1>(check_node);
  81. // check if the current node tensor name is included the watchpoint
  82. std::string current_node_name = current_tensor_name.substr(0, current_tensor_name.find_first_of(":"));
  83. if ((w_type == true && (current_tensor_name.find(w_name) != string::npos || w_name == "*")) ||
  84. (w_type == false && current_node_name == w_name)) {
  85. watchpoints_to_check_table[w_table_item.second.id] = w_table_item.second;
  86. break;
  87. }
  88. }
  89. }
  90. // check if no watchpoints are valid for the current tensor
  91. if (watchpoints_to_check_table.empty()) {
  92. continue;
  93. }
  94. // need to add support for float16 and float64, and other types when we support conditions beyond inf and nan
  95. if (tensor_data_type != kNumberTypeFloat && tensor_data_type != kNumberTypeFloat32) {
  96. continue;
  97. }
  98. float *start_addr = reinterpret_cast<float *>(tensor_ptr->data_c());
  99. unsigned int num_elements = (tensor_ptr->data().nbytes()) / sizeof(float);
  100. std::unordered_map<unsigned int, watchpoint_t>::iterator it_w_table_check;
  101. std::vector<unsigned int> hit_encountered;
  102. for (unsigned int index = 0; index < num_elements; index++) {
  103. float x = start_addr[index];
  104. it_w_table_check = watchpoints_to_check_table.begin();
  105. while (it_w_table_check != watchpoints_to_check_table.end()) {
  106. if ((it_w_table_check->second.conditions.inf.enabled || it_w_table_check->second.conditions.neg_inf.enabled) &&
  107. isinf(x)) {
  108. hit_encountered.push_back(it_w_table_check->second.id);
  109. } else if (it_w_table_check->second.conditions.nan.enabled && isnan(x)) {
  110. hit_encountered.push_back(it_w_table_check->second.id);
  111. }
  112. ++it_w_table_check;
  113. }
  114. if (hit_encountered.size()) {
  115. for (auto it_hit_id = hit_encountered.begin(); it_hit_id != hit_encountered.end(); ++it_hit_id) {
  116. std::string name_no_slot = current_tensor_name.substr(0, current_tensor_name.find_first_of(":"));
  117. name->push_back(name_no_slot);
  118. slot->push_back(std::to_string(tensor_list[i]->GetSlot()));
  119. data_ptr->push_back(reinterpret_cast<char *>(tensor_ptr->data_c()));
  120. data_size->push_back(tensor_ptr->data().nbytes());
  121. int condition_item = -1;
  122. if (watchpoint_table[*it_hit_id].conditions.nan.enabled) {
  123. condition_item = 0;
  124. } else if (watchpoint_table[*it_hit_id].conditions.inf.enabled ||
  125. watchpoint_table[*it_hit_id].conditions.neg_inf.enabled) {
  126. condition_item = 1;
  127. }
  128. condition->push_back(condition_item);
  129. wacthpoint_id->push_back(*it_hit_id);
  130. watchpoints_to_check_table.erase(*it_hit_id);
  131. }
  132. hit_encountered.clear();
  133. }
  134. if (watchpoints_to_check_table.empty()) {
  135. break;
  136. }
  137. }
  138. }
  139. }
  140. void DebugServices::read_nodes_tensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
  141. std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
  142. std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape) {
  143. std::vector<std::tuple<std::string, std::shared_ptr<TensorData>>> result_list;
  144. tensor_loader_->SearchTensors(name, &result_list);
  145. for (auto result : result_list) {
  146. if (!std::get<1>(result)) {
  147. continue;
  148. }
  149. ret_name->push_back(std::get<0>(result));
  150. data_ptr->push_back(reinterpret_cast<char *>(std::get<1>(result)->GetTensor()->data_c()));
  151. data_size->push_back(std::get<1>(result)->GetTensor()->data().nbytes());
  152. dtype->push_back(std::get<1>(result)->GetTensor()->Dtype());
  153. shape->push_back(std::get<1>(result)->GetTensor()->shape());
  154. }
  155. }
  156. TensorLoader *DebugServices::get_tensor_loader() const { return tensor_loader_; }
  157. } // namespace mindspore