You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensor_summary.cc 16 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. /**
  2. * Copyright 2020-2022 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <cmath>
  17. #include <algorithm>
  18. #include <future>
  19. #include <limits>
  20. #include <memory>
  21. #include <bitset>
  22. #include <tuple>
  23. #include <type_traits>
  24. #include "debug/debugger/tensor_summary.h"
  25. #ifdef OFFLINE_DBG_MODE
  26. #include "base/float16.h"
  27. #endif
  28. namespace mindspore {
  29. using CONDITION_TYPE = DebugServices::CONDITION_TYPE;
  30. RangeCountCalculator::RangeCountCalculator()
  31. : range_start_inclusive(-std::numeric_limits<double>::infinity()),
  32. range_end_inclusive(std::numeric_limits<double>::infinity()),
  33. count(0),
  34. total(0) {}
  35. void RangeCountCalculator::ProcessElement(double element) {
  36. count += (element >= range_start_inclusive && element <= range_end_inclusive);
  37. total += 1;
  38. }
  39. double RangeCountCalculator::GetPercentInRange() const {
  40. if (total == 0) {
  41. return 0.0;
  42. }
  43. const double factor = 100.0;
  44. return factor * count / total;
  45. }
  46. AllCloseCalculator::AllCloseCalculator() : atol(1.0e-8), rtol(1.0e-5), result(true) {}
  47. void AllCloseCalculator::ProcessElement(double current, double previous) {
  48. result = result && (std::abs(current - previous) <= (atol + rtol * std::abs(previous)));
  49. }
  50. bool AllCloseCalculator::IsAllClose() const { return result; }
  51. MeanCalculator::MeanCalculator() : mean(0.0), count(0) {}
  52. void MeanCalculator::ProcessElement(double value) {
  53. count += 1;
  54. double delta = value - mean;
  55. mean += delta / count;
  56. }
  57. double MeanCalculator::GetMean() const { return mean; }
  58. VarianceAndMeanCalculator::VarianceAndMeanCalculator() : mean(0.0), count(0), m2(0.0) {}
  59. void VarianceAndMeanCalculator::ProcessElement(double value) {
  60. count += 1;
  61. double delta = value - mean;
  62. mean += delta / count;
  63. m2 += delta * (value - mean);
  64. }
  65. double VarianceAndMeanCalculator::GetMean() const { return mean; }
  66. double VarianceAndMeanCalculator::GetVariance() const {
  67. if (count > 1) {
  68. return m2 / (count - 1);
  69. }
  70. return 0.0;
  71. }
  72. double VarianceAndMeanCalculator::GetStandardDeviation() { return sqrt(GetVariance()); }
  73. template <typename T>
  74. TensorSummary<T>::TensorSummary(const void *current_tensor_ptr, const void *const previous_tensor_ptr,
  75. uint64_t num_elements, uint64_t prev_num_elements)
  76. : current_tensor_ptr_(reinterpret_cast<const T *>(current_tensor_ptr)),
  77. prev_tensor_ptr_(reinterpret_cast<const T *>(previous_tensor_ptr)),
  78. num_elements_(num_elements),
  79. prev_num_elements_(prev_num_elements),
  80. min_(std::numeric_limits<double>::max()),
  81. max_(std::numeric_limits<double>::lowest()),
  82. avg_(0.0),
  83. is_bool_(false),
  84. neg_zero_count_(0),
  85. pos_zero_count_(0),
  86. pos_inf_count_(0),
  87. neg_inf_count_(0),
  88. inf_count_(0),
  89. nan_count_(0),
  90. zero_count_(0),
  91. epsilon_(1.0e-9),
  92. mean_sd_cal_enabled_(false) {}
  93. /*
  94. * Feature group: Online debugger, Offline debugger.
  95. * Target device group: Ascend, GPU.
  96. * Runtime category: Old runtime, MindRT.
  97. * Description: Initialize watchpoints calculators based on the watchpoint category. Process all the elements within the
  98. * current tensor.
  99. */
  100. template <typename T>
  101. void TensorSummary<T>::SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &wps) {
  102. InitCalculators(wps);
  103. for (size_t i = 0; i < num_elements_; ++i) {
  104. auto current_value = static_cast<double>(current_tensor_ptr_[i]);
  105. double previous_value = std::numeric_limits<double>::quiet_NaN();
  106. if (prev_tensor_ptr_) {
  107. if (num_elements_ == prev_num_elements_) {
  108. previous_value = static_cast<double>(prev_tensor_ptr_[i]);
  109. } else {
  110. MS_LOG(DEBUG) << "Current and previous tensor are not the same size.";
  111. }
  112. }
  113. if (std::isinf(current_value)) {
  114. inf_count_ += 1;
  115. }
  116. if (std::isnan(current_value)) {
  117. nan_count_ += 1;
  118. }
  119. if (current_value == 0) {
  120. zero_count_ += 1;
  121. }
  122. max_ = std::max(max_, current_value);
  123. min_ = std::min(min_, current_value);
  124. if (mean_sd_cal_enabled_) {
  125. current_mean_variance_.ProcessElement(current_value);
  126. }
  127. for (auto &it : all_close_) {
  128. it.second->ProcessElement(current_value, previous_value);
  129. }
  130. for (auto &range_count : range_counts_) {
  131. range_count.second->ProcessElement(current_value);
  132. }
  133. for (auto &mean : means_) {
  134. if (mean.first.compare("curr_prev_diff_mean") == 0) {
  135. mean.second->ProcessElement(std::abs(current_value - previous_value));
  136. } else if (mean.first.compare("abs_prev_mean") == 0) {
  137. mean.second->ProcessElement(std::abs(previous_value));
  138. } else if (mean.first.compare("abs_current_mean") == 0) {
  139. mean.second->ProcessElement(std::abs(current_value));
  140. }
  141. }
  142. }
  143. }
  144. /*
  145. * Feature group: Online debugger, Offline debugger.
  146. * Target device group: Ascend, GPU.
  147. * Runtime category: Old runtime, MindRT.
  148. * Description: Calculates statistics on chunks of data.
  149. */
  150. template <typename T>
  151. void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
  152. if (dtype_value == DT_BOOL) {
  153. is_bool_ = true;
  154. }
  155. const uint64_t default_threads = 32;
  156. const uint64_t default_elements_per_thread = 10000;
  157. if (num_elements_ <= default_elements_per_thread) {
  158. return TensorStatisticsSingleThread();
  159. }
  160. uint64_t desired_threads = num_elements_ / default_elements_per_thread;
  161. uint64_t actual_threads = std::min(desired_threads, default_threads);
  162. uint64_t actual_elements_per_thread = num_elements_ / actual_threads;
  163. // Use multithread to calculate statistic on chunks of data
  164. void *previous_tensor_ptr = nullptr;
  165. size_t offset = 0;
  166. std::vector<std::unique_ptr<TensorSummary<T>>> summary_vec;
  167. std::vector<std::future<void>> summary_future_vec;
  168. for (uint64_t i = 0; i < actual_threads; i++) {
  169. uint64_t num_elements_for_thread;
  170. if (i == actual_threads - 1) {
  171. num_elements_for_thread = num_elements_ - offset;
  172. } else {
  173. num_elements_for_thread = actual_elements_per_thread;
  174. }
  175. (void)summary_vec.emplace_back(std::make_unique<TensorSummary<T>>(current_tensor_ptr_ + offset, previous_tensor_ptr,
  176. num_elements_for_thread, 0));
  177. (void)summary_future_vec.emplace_back(
  178. std::async(std::launch::async, &TensorSummary<T>::TensorStatisticsSingleThread, summary_vec[i].get()));
  179. offset += num_elements_for_thread;
  180. }
  181. // Aggregate results of all chunks
  182. num_elements_ = 0; // Let current tensor weight 0 in the aggregation
  183. for (unsigned int i = 0; i < summary_future_vec.size(); i++) {
  184. summary_future_vec[i].wait();
  185. summary_future_vec[i].get();
  186. auto &cur_summary = *(summary_vec[i]);
  187. num_elements_ += cur_summary.num_elements_;
  188. min_ = std::min(min_, cur_summary.min_);
  189. max_ = std::max(max_, cur_summary.max_);
  190. double avg_delta = cur_summary.avg_ - avg_;
  191. avg_ += avg_delta * (cur_summary.num_elements_ / num_elements_);
  192. neg_zero_count_ += cur_summary.neg_zero_count_;
  193. pos_zero_count_ += cur_summary.pos_zero_count_;
  194. neg_inf_count_ += cur_summary.neg_inf_count_;
  195. pos_inf_count_ += cur_summary.pos_inf_count_;
  196. inf_count_ += cur_summary.inf_count_;
  197. nan_count_ += cur_summary.nan_count_;
  198. zero_count_ += cur_summary.zero_count_;
  199. }
  200. }
  201. /*
  202. * Feature group: Online debugger, Offline debugger.
  203. * Target device group: Ascend, GPU.
  204. * Runtime category: Old runtime, MindRT.
  205. * Description: Process all the elements of the chunked data and calculates the statistics.
  206. */
  207. template <typename T>
  208. void TensorSummary<T>::TensorStatisticsSingleThread() {
  209. MeanCalculator mean_calc = MeanCalculator();
  210. for (size_t i = 0; i < num_elements_; ++i) {
  211. auto current_value = static_cast<double>(current_tensor_ptr_[i]);
  212. if (std::isinf(current_value)) {
  213. if (current_value > 0) {
  214. pos_inf_count_ += 1;
  215. } else {
  216. neg_inf_count_ += 1;
  217. }
  218. }
  219. if (current_value == 0) {
  220. zero_count_ += 1;
  221. }
  222. if (std::isnan(current_value)) {
  223. nan_count_ += 1;
  224. }
  225. if (!(std::isnan(current_value) || std::isinf(current_value))) {
  226. // only considering tensor elements with value
  227. if (std::signbit(current_value) && !(current_value == 0)) {
  228. neg_zero_count_ += 1;
  229. } else if (!(current_value == 0)) {
  230. pos_zero_count_ += 1;
  231. }
  232. max_ = std::max(max_, current_value);
  233. min_ = std::min(min_, current_value);
  234. mean_calc.ProcessElement(current_value);
  235. }
  236. }
  237. avg_ = mean_calc.GetMean();
  238. }
  239. /*
  240. * Feature group: Online debugger, Offline debugger.
  241. * Target device group: Ascend, GPU.
  242. * Runtime category: Old runtime, MindRT.
  243. * Description: Returns a tuple with three elements, the first element is a bool and it is true if the watchpoint is
  244. * hit. The second element is the error_code which is set in this function and the third element is the parameter_list
  245. * for the watchpoint.
  246. */
  247. template <typename T>
  248. std::tuple<bool, int, std::vector<DebugServices::parameter_t>> TensorSummary<T>::IsWatchpointHit(
  249. DebugServices::watchpoint_t wp) {
  250. auto parameter_list = wp.parameter_list;
  251. bool hit = false;
  252. const uint8_t bit_size = 32;
  253. std::bitset<bit_size> error_code;
  254. CONDITION_TYPE type = wp.condition.type;
  255. // bit 0 denotes presence of nan
  256. (void)error_code.set(0, nan_count_ > 0);
  257. // bit 1 denotes presence of inf
  258. (void)error_code.set(1, inf_count_ > 0);
  259. if (type == CONDITION_TYPE::HAS_NAN) {
  260. error_code.reset();
  261. hit = nan_count_ > 0;
  262. } else if (type == CONDITION_TYPE::HAS_INF) {
  263. error_code.reset();
  264. hit = inf_count_ > 0;
  265. } else if (type == CONDITION_TYPE::GENERAL_OVERFLOW) {
  266. error_code.reset();
  267. hit = (nan_count_ + inf_count_) > 0;
  268. } else if (type == CONDITION_TYPE::NOT_CHANGED && prev_tensor_ptr_ && error_code.none()) {
  269. hit = all_close_[wp.id]->IsAllClose();
  270. } else if ((type == CONDITION_TYPE::NOT_CHANGED || type == CONDITION_TYPE::CHANGE_TOO_LARGE ||
  271. type == CONDITION_TYPE::CHANGE_TOO_SMALL) &&
  272. !prev_tensor_ptr_) {
  273. // bit 2 denotes absence of previous tensor
  274. error_code.set(2, true);
  275. }
  276. if (error_code.none()) {
  277. for (auto &parameter : parameter_list) {
  278. if (parameter.disabled || error_code.any()) {
  279. continue;
  280. }
  281. // extract inequality type from watchpoint for backward compatibility
  282. std::string inequality_type;
  283. if (wp.is_gt_wp()) {
  284. inequality_type = "gt";
  285. } else if (wp.is_lt_wp()) {
  286. inequality_type = "lt";
  287. }
  288. parameter.Evaluate(StatLookup(parameter.name, wp), inequality_type);
  289. hit = hit || parameter.hit;
  290. }
  291. }
  292. return std::make_tuple(hit, static_cast<int32_t>(error_code.to_ulong()), parameter_list);
  293. }
  294. template <typename T>
  295. double_t TensorSummary<T>::StatLookup(const std::string &parameter_name, const DebugServices::watchpoint_t &wp) {
  296. if (parameter_name == "param") return StatLookup(wp);
  297. std::string param_type;
  298. auto pos = parameter_name.find_last_of('_');
  299. if (pos != std::string::npos) {
  300. param_type = parameter_name.substr(0, pos);
  301. }
  302. if (param_type == "max") {
  303. return max_;
  304. }
  305. if (param_type == "min") {
  306. return min_;
  307. }
  308. if (param_type == "max_min") {
  309. return max_ - min_;
  310. }
  311. if (param_type == "mean") {
  312. return current_mean_variance_.GetMean();
  313. }
  314. if (param_type == "sd") {
  315. return current_mean_variance_.GetStandardDeviation();
  316. }
  317. if (param_type == "abs_mean") {
  318. if (means_.find("abs_current_mean") != means_.end()) {
  319. return means_["abs_current_mean"]->GetMean();
  320. }
  321. }
  322. if (param_type == "abs_mean_update_ratio" && prev_tensor_ptr_) {
  323. if (means_.find("curr_prev_diff_mean") != means_.end() && means_.find("abs_prev_mean") != means_.end()) {
  324. return means_["curr_prev_diff_mean"]->GetMean() / (means_["abs_prev_mean"]->GetMean() + epsilon_);
  325. }
  326. }
  327. if (param_type == "range_percentage") {
  328. if (range_counts_.find(wp.id) != range_counts_.end()) {
  329. return range_counts_[wp.id]->GetPercentInRange();
  330. }
  331. }
  332. if (param_type == "zero_percentage") {
  333. return GetZeroValPercent();
  334. }
  335. return std::numeric_limits<double_t>::quiet_NaN();
  336. }
  337. template <typename T>
  338. double_t TensorSummary<T>::StatLookup(const DebugServices::watchpoint_t &wp) {
  339. CONDITION_TYPE type = wp.condition.type;
  340. if (type == CONDITION_TYPE::MAX_LT || type == CONDITION_TYPE::MAX_GT) {
  341. return max_;
  342. }
  343. if (type == CONDITION_TYPE::MIN_LT || type == CONDITION_TYPE::MIN_GT) {
  344. return min_;
  345. }
  346. if (type == CONDITION_TYPE::MEAN_LT || type == CONDITION_TYPE::MEAN_GT) {
  347. return current_mean_variance_.GetMean();
  348. }
  349. if (type == CONDITION_TYPE::SD_LT || type == CONDITION_TYPE::SD_GT) {
  350. return current_mean_variance_.GetStandardDeviation();
  351. }
  352. if (type == CONDITION_TYPE::MAX_MIN_GT || type == CONDITION_TYPE::MAX_MIN_LT) {
  353. return max_ - min_;
  354. }
  355. return std::numeric_limits<double_t>::quiet_NaN();
  356. }
  357. template <typename T>
  358. double_t TensorSummary<T>::GetZeroValPercent() {
  359. if (num_elements_ == 0) {
  360. return 0;
  361. }
  362. return (zero_count_ * 100.0) / num_elements_;
  363. }
  364. template <typename T>
  365. void TensorSummary<T>::InitCalculators(const std::vector<DebugServices::watchpoint_t> &wps) {
  366. for (auto &wp : wps) {
  367. auto wp_id = wp.id;
  368. mean_sd_cal_enabled_ = mean_sd_cal_enabled_ || wp.mean_sd_enabled();
  369. if (wp.allclose_enabled() && prev_tensor_ptr_) {
  370. all_close_[wp_id] = std::make_unique<AllCloseCalculator>();
  371. if (!wp.parameter_list[0].disabled) {
  372. all_close_[wp_id]->set_rtol(wp.parameter_list[0].value);
  373. }
  374. if (!wp.parameter_list[1].disabled) {
  375. all_close_[wp_id]->set_atol(wp.parameter_list[1].value);
  376. }
  377. } else if (wp.range_enabled()) {
  378. range_counts_[wp_id] = std::make_unique<RangeCountCalculator>();
  379. if (!wp.parameter_list[0].disabled) {
  380. range_counts_[wp_id]->set_range_start_inclusive(wp.parameter_list[0].value);
  381. }
  382. if (!wp.parameter_list[1].disabled) {
  383. range_counts_[wp_id]->set_range_end_inclusive(wp.parameter_list[1].value);
  384. }
  385. } else if (wp.tensor_update_ratio_mean_enabled() && prev_tensor_ptr_) {
  386. (void)means_.emplace("curr_prev_diff_mean", std::make_unique<MeanCalculator>());
  387. (void)means_.emplace("abs_prev_mean", std::make_unique<MeanCalculator>());
  388. } else if (wp.abs_mean_enabled()) {
  389. (void)means_.emplace("abs_current_mean", std::make_unique<MeanCalculator>());
  390. }
  391. }
  392. }
  393. template class TensorSummary<uint8_t>;
  394. template class TensorSummary<int8_t>;
  395. template class TensorSummary<uint16_t>;
  396. template class TensorSummary<int16_t>;
  397. template class TensorSummary<uint32_t>;
  398. template class TensorSummary<int32_t>;
  399. template class TensorSummary<uint64_t>;
  400. template class TensorSummary<int64_t>;
  401. template class TensorSummary<float16>;
  402. template class TensorSummary<float>;
  403. template class TensorSummary<double>;
  404. template class TensorSummary<bool>;
  405. } // namespace mindspore