You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

debug_services.h 8.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_DEBUG_DEBUG_SERVICES_H_
  17. #define MINDSPORE_CCSRC_DEBUG_DEBUG_SERVICES_H_
  18. #include <math.h>
  19. #include <vector>
  20. #include <string>
  21. #include <memory>
  22. #include <tuple>
  23. #include <unordered_map>
  24. #include <mutex>
  25. #include <limits>
  26. #include "debug/tensor_load.h"
  27. #include "debug/tensor_data.h"
  28. #include "ir/dtype.h"
  29. namespace mindspore {
  30. class DebugServices {
  31. public:
  32. DebugServices();
  33. DebugServices(const DebugServices &other);
  34. DebugServices &operator=(const DebugServices &other);
  35. ~DebugServices();
  36. enum CONDITION_TYPE {
  37. HAS_NAN,
  38. HAS_INF,
  39. IS_OVERFLOW,
  40. MAX_GT,
  41. MAX_LT,
  42. MIN_GT,
  43. MIN_LT,
  44. MAX_MIN_GT,
  45. MAX_MIN_LT,
  46. MEAN_GT,
  47. MEAN_LT,
  48. SD_GT,
  49. SD_LT,
  50. GENERAL_OVERFLOW,
  51. INIT,
  52. TOO_LARGE,
  53. TOO_SMALL,
  54. ALL_ZERO,
  55. CHANGE_TOO_LARGE,
  56. CHANGE_TOO_SMALL,
  57. NOT_CHANGED
  58. };
  59. enum STAT_TYPE {
  60. STAT_MIN,
  61. STAT_MAX,
  62. STAT_MEAN,
  63. STAT_ZERO_PERCENTAGE,
  64. STAT_TENSOR_UPDATE_RATIO_MEAN,
  65. STAT_ALLCLOSE,
  66. STAT_ABS_MEAN
  67. };
  68. typedef struct condition {
  69. CONDITION_TYPE type;
  70. float parameter = 0;
  71. std::string comparison;
  72. } condition_t;
  73. typedef struct parameter {
  74. std::string name;
  75. bool disabled;
  76. double_t value;
  77. bool hit;
  78. } parameter_t;
  79. typedef struct watchpoint {
  80. unsigned int id;
  81. condition_t condition;
  82. std::vector<std::tuple<std::string, bool>> check_node_list;
  83. std::vector<parameter_t> parameter_list;
  84. size_t location = 0;
  85. bool IsNodeIncluded(const std::string &tensor_name) {
  86. std::string node_name = tensor_name.substr(0, tensor_name.find_first_of(':'));
  87. for (auto check_node : check_node_list) {
  88. std::string w_name = std::get<0>(check_node);
  89. bool w_type = std::get<1>(check_node);
  90. auto found = w_name.find_last_of('/');
  91. if (found != std::string::npos && w_name.substr(found + 1) == tensor_name) return true;
  92. if ((w_type && (tensor_name.find(w_name) == location || w_name == "*")) || (!w_type && node_name == w_name)) {
  93. return true;
  94. }
  95. }
  96. return false;
  97. }
  98. bool min_max_enabled() {
  99. return condition.type == MAX_LT || condition.type == MAX_GT || condition.type == MIN_LT ||
  100. condition.type == MIN_GT || condition.type == MAX_MIN_LT || condition.type == MAX_MIN_GT ||
  101. (condition.type == INIT && (!parameter_list[1].disabled || !parameter_list[2].disabled)) ||
  102. (condition.type == TOO_LARGE && (!parameter_list[1].disabled || !parameter_list[2].disabled)) ||
  103. (condition.type == TOO_SMALL && (!parameter_list[1].disabled || !parameter_list[2].disabled));
  104. }
  105. // inf or nan related condition set
  106. bool inf_nan_enabled() {
  107. return condition.type == HAS_INF || condition.type == HAS_NAN || condition.type == GENERAL_OVERFLOW;
  108. }
  109. // mean or sd related condition set
  110. bool mean_sd_enabled() {
  111. return condition.type == MEAN_LT || condition.type == MEAN_GT || condition.type == SD_LT ||
  112. condition.type == SD_GT || (condition.type == TOO_LARGE && !parameter_list[3].disabled) ||
  113. (condition.type == TOO_SMALL && !parameter_list[3].disabled);
  114. }
  115. bool abs_mean_enabled() {
  116. return (condition.type == TOO_LARGE && !parameter_list[0].disabled) ||
  117. (condition.type == TOO_SMALL && !parameter_list[0].disabled);
  118. }
  119. bool zero_percentage_enabled() { return condition.type == ALL_ZERO || condition.type == INIT; }
  120. bool tensor_update_ratio_mean_enabled() {
  121. return condition.type == CHANGE_TOO_LARGE || condition.type == CHANGE_TOO_SMALL;
  122. }
  123. bool allclose_enabled() { return condition.type == NOT_CHANGED; }
  124. } watchpoint_t;
  125. struct tensor_stats {
  126. double min = std::numeric_limits<double>::max();
  127. double max = std::numeric_limits<double>::lowest();
  128. bool has_inf = false;
  129. bool has_nan = false;
  130. unsigned int n = 0;
  131. double mean = 0.0;
  132. double m2 = 0.0;
  133. double zero_percentage = 0.0;
  134. double tensor_update_ratio_mean = -1;
  135. bool allclose = false;
  136. double abs_mean = 0.0;
  137. double statLookup(CONDITION_TYPE type) const {
  138. if (type == MAX_GT || type == MAX_LT) return max;
  139. if (type == MIN_GT || type == MIN_LT) return min;
  140. if (type == MAX_MIN_GT || type == MAX_MIN_LT) return (max - min);
  141. if (type == MEAN_GT || type == MEAN_LT) return mean;
  142. if (type == SD_GT || type == SD_LT) return getStandardDeviation();
  143. return std::numeric_limits<double>::quiet_NaN();
  144. }
  145. double parmLookup(STAT_TYPE type) const {
  146. if (type == STAT_MAX) return max;
  147. if (type == STAT_MIN) return min;
  148. if (type == STAT_MEAN) return mean;
  149. if (type == STAT_ZERO_PERCENTAGE) return zero_percentage;
  150. if (type == STAT_TENSOR_UPDATE_RATIO_MEAN) return tensor_update_ratio_mean;
  151. if (type == STAT_ALLCLOSE) return allclose;
  152. if (type == STAT_ABS_MEAN) return abs_mean;
  153. return std::numeric_limits<double>::quiet_NaN();
  154. }
  155. double getMean() const { return mean; }
  156. double getVariance() const {
  157. if (n > 1) {
  158. return m2 / (n - 1);
  159. } else {
  160. return 0.0;
  161. }
  162. }
  163. double getStandardDeviation() const { return sqrt(getVariance()); }
  164. };
  165. void AddWatchpoint(unsigned int id, unsigned int watch_condition, float parameter,
  166. const std::vector<std::tuple<std::string, bool>> &check_node_list,
  167. const std::vector<parameter_t> &parameter_list);
  168. void RemoveWatchpoint(unsigned int id);
  169. void CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<int> *condition,
  170. std::vector<unsigned int> *watchpoint_id, std::vector<std::vector<parameter_t>> *parameters,
  171. const std::vector<std::string> &op_overflows,
  172. const std::vector<std::shared_ptr<TensorData>> &tensor_list, bool init_dbg_suspend);
  173. void ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
  174. std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
  175. std::vector<TypePtr> *dtype, std::vector<std::vector<int64_t>> *shape);
  176. bool IsWatchPoint(std::string kernel_name, const CNodePtr &kernel = nullptr);
  177. bool IsWatchPointNodeInput(std::string w_name, const CNodePtr &kernel);
  178. void AddWeightsBiasInputs(std::vector<std::shared_ptr<TensorData>> *tensor_list, const CNodePtr &kernel);
  179. TensorLoader *tensor_loader() const;
  180. std::unordered_map<unsigned int, watchpoint_t> GetWatchpointTable();
  181. private:
  182. std::mutex lock_;
  183. std::unordered_map<unsigned int, watchpoint_t> watchpoint_table;
  184. std::vector<std::string> condition_label = {
  185. "HAS_NAN", "HAS_INF", "IS_OVERFLOW", "MAX_GT", "MAX_LT",
  186. "MIN_GT", "MIN_LT", "MAX_MIN_GT", "MAX_MIN_LT", "MEAN_GT",
  187. "MEAN_LT", "SD_GT", "SD_LT", "GENERAL_OVERFLOW", "INIT",
  188. "TOO_LARGE", "TOO_SMALL", "ALL_ZERO", "CHANGE_TOO_LARGE", "CHANGE_TOO_SMALL",
  189. "NOT_CHANGED"};
  190. TensorLoader *tensor_loader_;
  191. template <typename T>
  192. static tensor_stats SummarizeTensor(const T *start, const T *start_prev, unsigned int n, bool need_min_max,
  193. bool need_mean_sd, bool need_zero_percentage, bool need_tensor_update_ratio_mean,
  194. bool need_allclose, bool need_abs_mean_sd);
  195. };
  196. } // namespace mindspore
  197. #endif // MINDSPORE_CCSRC_DEBUG_DEBUG_SERVICES_H_