You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensor_summary.cc 12 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. /**
  2. * Copyright 2020-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <cmath>
  17. #include <algorithm>
  18. #include <limits>
  19. #include <memory>
  20. #include <bitset>
  21. #include <tuple>
  22. #include <type_traits>
  23. #include "debug/debugger/tensor_summary.h"
  24. #ifdef OFFLINE_DBG_MODE
  25. #include "base/float16.h"
  26. #endif
  27. #ifdef ONLINE_DBG_MODE
  28. namespace mindspore {
  29. #endif
  30. using CONDITION_TYPE = DebugServices::CONDITION_TYPE;
  31. RangeCountCalculator::RangeCountCalculator()
  32. : range_start_inclusive(-std::numeric_limits<double>::infinity()),
  33. range_end_inclusive(std::numeric_limits<double>::infinity()),
  34. count(0),
  35. total(0) {}
  36. void RangeCountCalculator::ProcessElement(double element) {
  37. count += (element >= range_start_inclusive && element <= range_end_inclusive);
  38. total += 1;
  39. }
  40. double RangeCountCalculator::GetPercentInRange() const {
  41. if (total == 0) {
  42. return 0.0;
  43. }
  44. const double factor = 100.0;
  45. return factor * count / total;
  46. }
  47. AllCloseCalculator::AllCloseCalculator() : atol(1.0e-8), rtol(1.0e-5), result(true) {}
  48. void AllCloseCalculator::ProcessElement(double current, double previous) {
  49. result = result && (std::abs(current - previous) <= (atol + rtol * std::abs(previous)));
  50. }
  51. bool AllCloseCalculator::IsAllClose() const { return result; }
  52. MeanCalculator::MeanCalculator() : mean(0.0), count(0) {}
  53. void MeanCalculator::ProcessElement(double value) {
  54. count += 1;
  55. double delta = value - mean;
  56. mean += delta / count;
  57. }
  58. double MeanCalculator::GetMean() const { return mean; }
  59. VarianceAndMeanCalculator::VarianceAndMeanCalculator() : mean(0.0), count(0), m2(0.0) {}
  60. void VarianceAndMeanCalculator::ProcessElement(double value) {
  61. count += 1;
  62. double delta = value - mean;
  63. mean += delta / count;
  64. m2 += delta * (value - mean);
  65. }
  66. double VarianceAndMeanCalculator::GetMean() const { return mean; }
  67. double VarianceAndMeanCalculator::GetVariance() const {
  68. if (count > 1) {
  69. return m2 / (count - 1);
  70. }
  71. return 0.0;
  72. }
  73. double VarianceAndMeanCalculator::GetStandardDeviation() { return sqrt(GetVariance()); }
  74. template <typename T>
  75. TensorSummary<T>::TensorSummary(const void *current_tensor_ptr, const void *const previous_tensor_ptr,
  76. uint32_t num_elements, uint32_t prev_num_elements)
  77. : current_tensor_ptr_(reinterpret_cast<const T *>(current_tensor_ptr)),
  78. prev_tensor_ptr_(reinterpret_cast<const T *>(previous_tensor_ptr)),
  79. num_elements_(num_elements),
  80. prev_num_elements_(prev_num_elements),
  81. min_(std::numeric_limits<double>::max()),
  82. max_(std::numeric_limits<double>::lowest()),
  83. avg_(0.0),
  84. is_bool_(false),
  85. neg_zero_count_(0),
  86. pos_zero_count_(0),
  87. pos_inf_count_(0),
  88. neg_inf_count_(0),
  89. inf_count_(0),
  90. nan_count_(0),
  91. zero_count_(0),
  92. epsilon_(1.0e-9),
  93. mean_sd_cal_enabled_(false) {}
  94. template <typename T>
  95. void TensorSummary<T>::SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &wps) {
  96. InitCalculators(wps);
  97. for (size_t i = 0; i < num_elements_; ++i) {
  98. auto current_value = static_cast<double>(current_tensor_ptr_[i]);
  99. double previous_value = std::numeric_limits<double>::quiet_NaN();
  100. if (prev_tensor_ptr_) {
  101. if (num_elements_ == prev_num_elements_) {
  102. previous_value = static_cast<double>(prev_tensor_ptr_[i]);
  103. } else {
  104. MS_LOG(DEBUG) << "Current and previous tensor are not the same size.";
  105. }
  106. }
  107. if (std::isinf(current_value)) {
  108. inf_count_ += 1;
  109. }
  110. if (std::isnan(current_value)) {
  111. nan_count_ += 1;
  112. }
  113. if (current_value == 0) {
  114. zero_count_ += 1;
  115. }
  116. max_ = std::max(max_, current_value);
  117. min_ = std::min(min_, current_value);
  118. if (mean_sd_cal_enabled_) {
  119. current_mean_variance_.ProcessElement(current_value);
  120. }
  121. for (auto &it : all_close_) {
  122. it.second->ProcessElement(current_value, previous_value);
  123. }
  124. for (auto &range_count : range_counts_) {
  125. range_count.second->ProcessElement(current_value);
  126. }
  127. for (auto &mean : means_) {
  128. if (mean.first.compare("curr_prev_diff_mean") == 0) {
  129. mean.second->ProcessElement(std::abs(current_value - previous_value));
  130. } else if (mean.first.compare("abs_prev_mean") == 0) {
  131. mean.second->ProcessElement(std::abs(previous_value));
  132. } else if (mean.first.compare("abs_current_mean") == 0) {
  133. mean.second->ProcessElement(std::abs(current_value));
  134. }
  135. }
  136. }
  137. }
  138. template <typename T>
  139. void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
  140. if (dtype_value == DT_BOOL) {
  141. is_bool_ = true;
  142. }
  143. double sum_elements = 0.0;
  144. for (size_t i = 0; i < num_elements_; ++i) {
  145. auto current_value = static_cast<double>(current_tensor_ptr_[i]);
  146. if (std::isinf(current_value)) {
  147. if (current_value > 0) {
  148. pos_inf_count_ += 1;
  149. } else {
  150. neg_inf_count_ += 1;
  151. }
  152. }
  153. if (current_value == 0) {
  154. zero_count_ += 1;
  155. }
  156. if (std::isnan(current_value)) {
  157. nan_count_ += 1;
  158. }
  159. if (!(std::isnan(current_value) || std::isinf(current_value))) {
  160. // only considering tensor elements with value
  161. if (std::signbit(current_value) && !(current_value == 0)) {
  162. neg_zero_count_ += 1;
  163. } else if (!(current_value == 0)) {
  164. pos_zero_count_ += 1;
  165. }
  166. max_ = std::max(max_, current_value);
  167. min_ = std::min(min_, current_value);
  168. sum_elements += current_value;
  169. }
  170. }
  171. unsigned int value_count = zero_count_ + neg_zero_count_ + pos_zero_count_;
  172. avg_ = sum_elements / value_count;
  173. }
  174. template <typename T>
  175. std::tuple<bool, int, std::vector<DebugServices::parameter_t>> TensorSummary<T>::IsWatchpointHit(
  176. DebugServices::watchpoint_t wp) {
  177. auto parameter_list = wp.parameter_list;
  178. bool hit = false;
  179. const uint8_t bit_size = 32;
  180. std::bitset<bit_size> error_code;
  181. CONDITION_TYPE type = wp.condition.type;
  182. // bit 0 denotes presence of nan
  183. (void)error_code.set(0, nan_count_ > 0);
  184. // bit 1 denotes presence of inf
  185. (void)error_code.set(1, inf_count_ > 0);
  186. if (type == CONDITION_TYPE::HAS_NAN) {
  187. error_code.reset();
  188. hit = nan_count_ > 0;
  189. } else if (type == CONDITION_TYPE::HAS_INF) {
  190. error_code.reset();
  191. hit = inf_count_ > 0;
  192. } else if (type == CONDITION_TYPE::GENERAL_OVERFLOW) {
  193. error_code.reset();
  194. hit = (nan_count_ + inf_count_) > 0;
  195. } else if (type == CONDITION_TYPE::NOT_CHANGED && prev_tensor_ptr_ && error_code.none()) {
  196. hit = all_close_[wp.id]->IsAllClose();
  197. } else if ((type == CONDITION_TYPE::NOT_CHANGED || type == CONDITION_TYPE::CHANGE_TOO_LARGE ||
  198. type == CONDITION_TYPE::CHANGE_TOO_SMALL) &&
  199. !prev_tensor_ptr_) {
  200. // bit 2 denotes absence of previous tensor
  201. error_code.set(2, true);
  202. }
  203. if (error_code.none()) {
  204. for (auto &parameter : parameter_list) {
  205. if (parameter.disabled || error_code.any()) {
  206. continue;
  207. }
  208. // extract inequality type from watchpoint for backward compatibility
  209. std::string inequality_type;
  210. if (wp.is_gt_wp()) {
  211. inequality_type = "gt";
  212. } else if (wp.is_lt_wp()) {
  213. inequality_type = "lt";
  214. }
  215. parameter.Evaluate(StatLookup(parameter.name, wp), inequality_type);
  216. hit = hit || parameter.hit;
  217. }
  218. }
  219. return std::make_tuple(hit, static_cast<int32_t>(error_code.to_ulong()), parameter_list);
  220. }
  221. template <typename T>
  222. double_t TensorSummary<T>::StatLookup(const std::string &parameter_name, const DebugServices::watchpoint_t &wp) {
  223. if (parameter_name == "param") return StatLookup(wp);
  224. std::string param_type;
  225. auto pos = parameter_name.find_last_of('_');
  226. if (pos != std::string::npos) {
  227. param_type = parameter_name.substr(0, pos);
  228. }
  229. if (param_type == "max") {
  230. return max_;
  231. }
  232. if (param_type == "min") {
  233. return min_;
  234. }
  235. if (param_type == "max_min") {
  236. return max_ - min_;
  237. }
  238. if (param_type == "mean") {
  239. return current_mean_variance_.GetMean();
  240. }
  241. if (param_type == "sd") {
  242. return current_mean_variance_.GetStandardDeviation();
  243. }
  244. if (param_type == "abs_mean") {
  245. if (means_.find("abs_current_mean") != means_.end()) {
  246. return means_["abs_current_mean"]->GetMean();
  247. }
  248. }
  249. if (param_type == "abs_mean_update_ratio" && prev_tensor_ptr_) {
  250. if (means_.find("curr_prev_diff_mean") != means_.end() && means_.find("abs_prev_mean") != means_.end()) {
  251. return means_["curr_prev_diff_mean"]->GetMean() / (means_["abs_prev_mean"]->GetMean() + epsilon_);
  252. }
  253. }
  254. if (param_type == "range_percentage") {
  255. if (range_counts_.find(wp.id) != range_counts_.end()) {
  256. return range_counts_[wp.id]->GetPercentInRange();
  257. }
  258. }
  259. if (param_type == "zero_percentage") {
  260. return GetZeroValPercent();
  261. }
  262. return std::numeric_limits<double_t>::quiet_NaN();
  263. }
  264. template <typename T>
  265. double_t TensorSummary<T>::StatLookup(const DebugServices::watchpoint_t &wp) {
  266. CONDITION_TYPE type = wp.condition.type;
  267. if (type == CONDITION_TYPE::MAX_LT || type == CONDITION_TYPE::MAX_GT) {
  268. return max_;
  269. }
  270. if (type == CONDITION_TYPE::MIN_LT || type == CONDITION_TYPE::MIN_GT) {
  271. return min_;
  272. }
  273. if (type == CONDITION_TYPE::MEAN_LT || type == CONDITION_TYPE::MEAN_GT) {
  274. return current_mean_variance_.GetMean();
  275. }
  276. if (type == CONDITION_TYPE::SD_LT || type == CONDITION_TYPE::SD_GT) {
  277. return current_mean_variance_.GetStandardDeviation();
  278. }
  279. if (type == CONDITION_TYPE::MAX_MIN_GT || type == CONDITION_TYPE::MAX_MIN_LT) {
  280. return max_ - min_;
  281. }
  282. return std::numeric_limits<double_t>::quiet_NaN();
  283. }
  284. template <typename T>
  285. double_t TensorSummary<T>::GetZeroValPercent() {
  286. if (num_elements_ == 0) {
  287. return 0;
  288. }
  289. return (zero_count_ * 100.0) / num_elements_;
  290. }
  291. template <typename T>
  292. void TensorSummary<T>::InitCalculators(const std::vector<DebugServices::watchpoint_t> &wps) {
  293. for (auto &wp : wps) {
  294. auto wp_id = wp.id;
  295. mean_sd_cal_enabled_ = mean_sd_cal_enabled_ || wp.mean_sd_enabled();
  296. if (wp.allclose_enabled() && prev_tensor_ptr_) {
  297. all_close_[wp_id] = std::make_unique<AllCloseCalculator>();
  298. if (!wp.parameter_list[0].disabled) {
  299. all_close_[wp_id]->set_atol(wp.parameter_list[0].value);
  300. }
  301. if (!wp.parameter_list[1].disabled) {
  302. all_close_[wp_id]->set_rtol(wp.parameter_list[1].value);
  303. }
  304. } else if (wp.range_enabled()) {
  305. range_counts_[wp_id] = std::make_unique<RangeCountCalculator>();
  306. if (!wp.parameter_list[0].disabled) {
  307. range_counts_[wp_id]->set_range_start_inclusive(wp.parameter_list[0].value);
  308. }
  309. if (!wp.parameter_list[1].disabled) {
  310. range_counts_[wp_id]->set_range_end_inclusive(wp.parameter_list[1].value);
  311. }
  312. } else if (wp.tensor_update_ratio_mean_enabled() && prev_tensor_ptr_) {
  313. (void)means_.emplace("curr_prev_diff_mean", std::make_unique<MeanCalculator>());
  314. (void)means_.emplace("abs_prev_mean", std::make_unique<MeanCalculator>());
  315. } else if (wp.abs_mean_enabled()) {
  316. (void)means_.emplace("abs_current_mean", std::make_unique<MeanCalculator>());
  317. }
  318. }
  319. }
  320. template class TensorSummary<uint8_t>;
  321. template class TensorSummary<int8_t>;
  322. template class TensorSummary<uint16_t>;
  323. template class TensorSummary<int16_t>;
  324. template class TensorSummary<uint32_t>;
  325. template class TensorSummary<int32_t>;
  326. template class TensorSummary<uint64_t>;
  327. template class TensorSummary<int64_t>;
  328. template class TensorSummary<float16>;
  329. template class TensorSummary<float>;
  330. template class TensorSummary<double>;
  331. template class TensorSummary<bool>;
  332. #ifdef ONLINE_DBG_MODE
  333. } // namespace mindspore
  334. #endif