|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363 |
- /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- #include <cmath>
- #include <algorithm>
- #include <limits>
- #include <memory>
- #include <bitset>
- #include <tuple>
- #include <type_traits>
- #include "debug/debugger/tensor_summary.h"
-
- #ifdef OFFLINE_DBG_MODE
- #include "base/float16.h"
- #endif
-
- #ifdef ONLINE_DBG_MODE
- namespace mindspore {
- #endif
- using CONDITION_TYPE = DebugServices::CONDITION_TYPE;
-
- RangeCountCalculator::RangeCountCalculator()
- : range_start_inclusive(-std::numeric_limits<double>::infinity()),
- range_end_inclusive(std::numeric_limits<double>::infinity()),
- count(0),
- total(0) {}
-
- void RangeCountCalculator::ProcessElement(double element) {
- count += (element >= range_start_inclusive && element <= range_end_inclusive);
- total += 1;
- }
-
- double RangeCountCalculator::GetPercentInRange() const {
- if (total == 0) {
- return 0.0;
- }
- const double factor = 100.0;
- return factor * count / total;
- }
-
- AllCloseCalculator::AllCloseCalculator() : atol(1.0e-8), rtol(1.0e-5), result(true) {}
-
- void AllCloseCalculator::ProcessElement(double current, double previous) {
- result = result && (std::abs(current - previous) <= (atol + rtol * std::abs(previous)));
- }
-
- bool AllCloseCalculator::IsAllClose() const { return result; }
-
- MeanCalculator::MeanCalculator() : mean(0.0), count(0) {}
-
- void MeanCalculator::ProcessElement(double value) {
- count += 1;
- double delta = value - mean;
- mean += delta / count;
- }
-
- double MeanCalculator::GetMean() const { return mean; }
-
- VarianceAndMeanCalculator::VarianceAndMeanCalculator() : mean(0.0), count(0), m2(0.0) {}
-
- void VarianceAndMeanCalculator::ProcessElement(double value) {
- count += 1;
- double delta = value - mean;
- mean += delta / count;
- m2 += delta * (value - mean);
- }
-
- double VarianceAndMeanCalculator::GetMean() const { return mean; }
-
- double VarianceAndMeanCalculator::GetVariance() const {
- if (count > 1) {
- return m2 / (count - 1);
- }
- return 0.0;
- }
-
- double VarianceAndMeanCalculator::GetStandardDeviation() { return sqrt(GetVariance()); }
-
- template <typename T>
- TensorSummary<T>::TensorSummary(const void *current_tensor_ptr, const void *const previous_tensor_ptr,
- uint32_t num_elements, uint32_t prev_num_elements)
- : current_tensor_ptr_(reinterpret_cast<const T *>(current_tensor_ptr)),
- prev_tensor_ptr_(reinterpret_cast<const T *>(previous_tensor_ptr)),
- num_elements_(num_elements),
- prev_num_elements_(prev_num_elements),
- min_(std::numeric_limits<double>::max()),
- max_(std::numeric_limits<double>::lowest()),
- avg_(0.0),
- is_bool_(false),
- neg_zero_count_(0),
- pos_zero_count_(0),
- pos_inf_count_(0),
- neg_inf_count_(0),
- inf_count_(0),
- nan_count_(0),
- zero_count_(0),
- epsilon_(1.0e-9),
- mean_sd_cal_enabled_(false) {}
-
- template <typename T>
- void TensorSummary<T>::SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &wps) {
- InitCalculators(wps);
- for (size_t i = 0; i < num_elements_; ++i) {
- auto current_value = static_cast<double>(current_tensor_ptr_[i]);
- double previous_value = std::numeric_limits<double>::quiet_NaN();
- if (prev_tensor_ptr_) {
- if (num_elements_ == prev_num_elements_) {
- previous_value = static_cast<double>(prev_tensor_ptr_[i]);
- } else {
- MS_LOG(DEBUG) << "Current and previous tensor are not the same size.";
- }
- }
- if (std::isinf(current_value)) {
- inf_count_ += 1;
- }
- if (std::isnan(current_value)) {
- nan_count_ += 1;
- }
- if (current_value == 0) {
- zero_count_ += 1;
- }
- max_ = std::max(max_, current_value);
- min_ = std::min(min_, current_value);
- if (mean_sd_cal_enabled_) {
- current_mean_variance_.ProcessElement(current_value);
- }
- for (auto &it : all_close_) {
- it.second->ProcessElement(current_value, previous_value);
- }
- for (auto &range_count : range_counts_) {
- range_count.second->ProcessElement(current_value);
- }
- for (auto &mean : means_) {
- if (mean.first.compare("curr_prev_diff_mean") == 0) {
- mean.second->ProcessElement(std::abs(current_value - previous_value));
- } else if (mean.first.compare("abs_prev_mean") == 0) {
- mean.second->ProcessElement(std::abs(previous_value));
- } else if (mean.first.compare("abs_current_mean") == 0) {
- mean.second->ProcessElement(std::abs(current_value));
- }
- }
- }
- }
-
- template <typename T>
- void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
- if (dtype_value == DT_BOOL) {
- is_bool_ = true;
- }
- double sum_elements = 0.0;
- for (size_t i = 0; i < num_elements_; ++i) {
- auto current_value = static_cast<double>(current_tensor_ptr_[i]);
- if (std::isinf(current_value)) {
- if (current_value > 0) {
- pos_inf_count_ += 1;
- } else {
- neg_inf_count_ += 1;
- }
- }
- if (current_value == 0) {
- zero_count_ += 1;
- }
- if (std::isnan(current_value)) {
- nan_count_ += 1;
- }
- if (!(std::isnan(current_value) || std::isinf(current_value))) {
- // only considering tensor elements with value
- if (std::signbit(current_value) && !(current_value == 0)) {
- neg_zero_count_ += 1;
- } else if (!(current_value == 0)) {
- pos_zero_count_ += 1;
- }
- max_ = std::max(max_, current_value);
- min_ = std::min(min_, current_value);
- sum_elements += current_value;
- }
- }
- unsigned int value_count = zero_count_ + neg_zero_count_ + pos_zero_count_;
- avg_ = sum_elements / value_count;
- }
-
- template <typename T>
- std::tuple<bool, int, std::vector<DebugServices::parameter_t>> TensorSummary<T>::IsWatchpointHit(
- DebugServices::watchpoint_t wp) {
- auto parameter_list = wp.parameter_list;
- bool hit = false;
- const uint8_t bit_size = 32;
- std::bitset<bit_size> error_code;
- CONDITION_TYPE type = wp.condition.type;
- // bit 0 denotes presence of nan
- (void)error_code.set(0, nan_count_ > 0);
- // bit 1 denotes presence of inf
- (void)error_code.set(1, inf_count_ > 0);
-
- if (type == CONDITION_TYPE::HAS_NAN) {
- error_code.reset();
- hit = nan_count_ > 0;
- } else if (type == CONDITION_TYPE::HAS_INF) {
- error_code.reset();
- hit = inf_count_ > 0;
- } else if (type == CONDITION_TYPE::GENERAL_OVERFLOW) {
- error_code.reset();
- hit = (nan_count_ + inf_count_) > 0;
- } else if (type == CONDITION_TYPE::NOT_CHANGED && prev_tensor_ptr_ && error_code.none()) {
- hit = all_close_[wp.id]->IsAllClose();
- } else if ((type == CONDITION_TYPE::NOT_CHANGED || type == CONDITION_TYPE::CHANGE_TOO_LARGE ||
- type == CONDITION_TYPE::CHANGE_TOO_SMALL) &&
- !prev_tensor_ptr_) {
- // bit 2 denotes absence of previous tensor
- error_code.set(2, true);
- }
-
- if (error_code.none()) {
- for (auto ¶meter : parameter_list) {
- if (parameter.disabled || error_code.any()) {
- continue;
- }
- // extract inequality type from watchpoint for backward compatibility
- std::string inequality_type;
- if (wp.is_gt_wp()) {
- inequality_type = "gt";
- } else if (wp.is_lt_wp()) {
- inequality_type = "lt";
- }
- parameter.Evaluate(StatLookup(parameter.name, wp), inequality_type);
- hit = hit || parameter.hit;
- }
- }
- return std::make_tuple(hit, static_cast<int32_t>(error_code.to_ulong()), parameter_list);
- }
-
- template <typename T>
- double_t TensorSummary<T>::StatLookup(const std::string ¶meter_name, const DebugServices::watchpoint_t &wp) {
- if (parameter_name == "param") return StatLookup(wp);
- std::string param_type;
- auto pos = parameter_name.find_last_of('_');
- if (pos != std::string::npos) {
- param_type = parameter_name.substr(0, pos);
- }
-
- if (param_type == "max") {
- return max_;
- }
- if (param_type == "min") {
- return min_;
- }
- if (param_type == "max_min") {
- return max_ - min_;
- }
- if (param_type == "mean") {
- return current_mean_variance_.GetMean();
- }
- if (param_type == "sd") {
- return current_mean_variance_.GetStandardDeviation();
- }
- if (param_type == "abs_mean") {
- if (means_.find("abs_current_mean") != means_.end()) {
- return means_["abs_current_mean"]->GetMean();
- }
- }
- if (param_type == "abs_mean_update_ratio" && prev_tensor_ptr_) {
- if (means_.find("curr_prev_diff_mean") != means_.end() && means_.find("abs_prev_mean") != means_.end()) {
- return means_["curr_prev_diff_mean"]->GetMean() / (means_["abs_prev_mean"]->GetMean() + epsilon_);
- }
- }
- if (param_type == "range_percentage") {
- if (range_counts_.find(wp.id) != range_counts_.end()) {
- return range_counts_[wp.id]->GetPercentInRange();
- }
- }
- if (param_type == "zero_percentage") {
- return GetZeroValPercent();
- }
- return std::numeric_limits<double_t>::quiet_NaN();
- }
-
- template <typename T>
- double_t TensorSummary<T>::StatLookup(const DebugServices::watchpoint_t &wp) {
- CONDITION_TYPE type = wp.condition.type;
- if (type == CONDITION_TYPE::MAX_LT || type == CONDITION_TYPE::MAX_GT) {
- return max_;
- }
- if (type == CONDITION_TYPE::MIN_LT || type == CONDITION_TYPE::MIN_GT) {
- return min_;
- }
- if (type == CONDITION_TYPE::MEAN_LT || type == CONDITION_TYPE::MEAN_GT) {
- return current_mean_variance_.GetMean();
- }
- if (type == CONDITION_TYPE::SD_LT || type == CONDITION_TYPE::SD_GT) {
- return current_mean_variance_.GetStandardDeviation();
- }
- if (type == CONDITION_TYPE::MAX_MIN_GT || type == CONDITION_TYPE::MAX_MIN_LT) {
- return max_ - min_;
- }
- return std::numeric_limits<double_t>::quiet_NaN();
- }
-
- template <typename T>
- double_t TensorSummary<T>::GetZeroValPercent() {
- if (num_elements_ == 0) {
- return 0;
- }
-
- return (zero_count_ * 100.0) / num_elements_;
- }
-
- template <typename T>
- void TensorSummary<T>::InitCalculators(const std::vector<DebugServices::watchpoint_t> &wps) {
- for (auto &wp : wps) {
- auto wp_id = wp.id;
- mean_sd_cal_enabled_ = mean_sd_cal_enabled_ || wp.mean_sd_enabled();
- if (wp.allclose_enabled() && prev_tensor_ptr_) {
- all_close_[wp_id] = std::make_unique<AllCloseCalculator>();
- if (!wp.parameter_list[0].disabled) {
- all_close_[wp_id]->set_atol(wp.parameter_list[0].value);
- }
- if (!wp.parameter_list[1].disabled) {
- all_close_[wp_id]->set_rtol(wp.parameter_list[1].value);
- }
- } else if (wp.range_enabled()) {
- range_counts_[wp_id] = std::make_unique<RangeCountCalculator>();
- if (!wp.parameter_list[0].disabled) {
- range_counts_[wp_id]->set_range_start_inclusive(wp.parameter_list[0].value);
- }
- if (!wp.parameter_list[1].disabled) {
- range_counts_[wp_id]->set_range_end_inclusive(wp.parameter_list[1].value);
- }
- } else if (wp.tensor_update_ratio_mean_enabled() && prev_tensor_ptr_) {
- (void)means_.emplace("curr_prev_diff_mean", std::make_unique<MeanCalculator>());
- (void)means_.emplace("abs_prev_mean", std::make_unique<MeanCalculator>());
- } else if (wp.abs_mean_enabled()) {
- (void)means_.emplace("abs_current_mean", std::make_unique<MeanCalculator>());
- }
- }
- }
- template class TensorSummary<uint8_t>;
- template class TensorSummary<int8_t>;
- template class TensorSummary<uint16_t>;
- template class TensorSummary<int16_t>;
- template class TensorSummary<uint32_t>;
- template class TensorSummary<int32_t>;
- template class TensorSummary<uint64_t>;
- template class TensorSummary<int64_t>;
- template class TensorSummary<float16>;
- template class TensorSummary<float>;
- template class TensorSummary<double>;
- template class TensorSummary<bool>;
- #ifdef ONLINE_DBG_MODE
- } // namespace mindspore
- #endif
|