Browse Source

!29703 Fix codex warning for dump and debugger in master

Merge pull request !29703 from TinaMengtingZhang/codex
feature/build-system-rewrite
i-robot Gitee 4 years ago
parent
commit
b9ee1433e2
No known key found for this signature in database GPG Key ID: 173E9B9CA92EEF8F
18 changed files with 153 additions and 172 deletions
  1. +2
    -1
      mindspore/ccsrc/debug/common.cc
  2. +2
    -2
      mindspore/ccsrc/debug/data_dump/dump_json_parser.cc
  3. +0
    -1
      mindspore/ccsrc/debug/data_dump/e2e_dump.h
  4. +35
    -67
      mindspore/ccsrc/debug/debug_services.cc
  5. +15
    -15
      mindspore/ccsrc/debug/debug_services.h
  6. +6
    -6
      mindspore/ccsrc/debug/debugger/debugger.cc
  7. +14
    -9
      mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.cc
  8. +18
    -16
      mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.h
  9. +2
    -0
      mindspore/ccsrc/debug/debugger/offline_debug/mi_pybind_register.cc
  10. +1
    -1
      mindspore/ccsrc/debug/debugger/proto_exporter.cc
  11. +1
    -1
      mindspore/ccsrc/debug/debugger/tensor_summary.cc
  12. +24
    -24
      mindspore/ccsrc/debug/debugger/tensor_summary.h
  13. +3
    -3
      mindspore/ccsrc/debug/dump_proto.cc
  14. +2
    -2
      mindspore/ccsrc/debug/env_config_parser.h
  15. +13
    -14
      mindspore/ccsrc/debug/tensor_data.h
  16. +11
    -8
      mindspore/ccsrc/debug/tensor_load.h
  17. +3
    -2
      mindspore/ccsrc/utils/summary/event_writer.cc
  18. +1
    -0
      mindspore/ccsrc/utils/summary/event_writer.h

+ 2
- 1
mindspore/ccsrc/debug/common.cc View File

@@ -61,7 +61,8 @@ std::optional<std::string> Common::CreatePrefixPath(const std::string &input_pat
return std::string(prefix_path_str + "/" + file_name_str);
}

bool Common::CommonFuncForConfigPath(const std::string &default_path, const std::string &env_path, std::string *value) {
bool Common::CommonFuncForConfigPath(const std::string &default_path, const std::string &env_path,
std::string *const value) {
MS_EXCEPTION_IF_NULL(value);
value->clear();
if (!env_path.empty()) {


+ 2
- 2
mindspore/ccsrc/debug/data_dump/dump_json_parser.cc View File

@@ -512,7 +512,7 @@ void DumpJsonParser::ParseInputOutput(const nlohmann::json &content) {
CheckJsonUnsignedType(content, kInputOutput);
input_output_ = content;
const uint32_t max_inout_num = 2;
if (input_output_ < 0 || input_output_ > max_inout_num) {
if (input_output_ > max_inout_num) {
MS_LOG(EXCEPTION) << "Dump Json Parse Failed. input_output should be 0, 1, 2";
}
}
@@ -557,7 +557,7 @@ void DumpJsonParser::ParseOpDebugMode(const nlohmann::json &content) {
CheckJsonUnsignedType(content, kOpDebugMode);
op_debug_mode_ = content;
const size_t max_mode = 3;
if (op_debug_mode_ < 0 || op_debug_mode_ > max_mode) {
if (op_debug_mode_ > max_mode) {
MS_LOG(EXCEPTION) << "Dump Json Parse Failed. op_debug_mode should be 0, 1, 2, 3";
}
}


+ 0
- 1
mindspore/ccsrc/debug/data_dump/e2e_dump.h View File

@@ -18,7 +18,6 @@
#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_H_

#include <dirent.h>
#include <stdlib.h>
#include <map>
#include <string>



+ 35
- 67
mindspore/ccsrc/debug/debug_services.cc View File

@@ -53,6 +53,16 @@ constexpr char *kStrErrorNone = nullptr;
#endif
} // namespace

bool IsRegFile(const std::string &file_path) {
struct stat st;
int ret = stat(file_path.c_str(), &st);
if (ret != 0) {
MS_LOG(ERROR) << "stat error for " << file_path << ", ret is: " << ret;
return false;
}
return S_ISREG(st.st_mode);
}

DebugServices::DebugServices() { tensor_loader_ = std::make_shared<TensorLoader>(); }

DebugServices::DebugServices(const DebugServices &other) {
@@ -80,7 +90,7 @@ DebugServices &DebugServices::operator=(const DebugServices &other) {
* watchpoint_table.
*/
void DebugServices::AddWatchpoint(
unsigned int id, unsigned int watch_condition, float parameter,
unsigned int id, int watch_condition, float parameter,
const std::vector<std::tuple<std::string, bool>> &check_node_list, const std::vector<parameter_t> &parameter_list,
const std::vector<std::tuple<std::string, std::vector<uint32_t>>> *check_node_device_list,
const std::vector<std::tuple<std::string, std::vector<uint32_t>>> *check_node_graph_list) {
@@ -116,8 +126,8 @@ void DebugServices::RemoveWatchpoint(unsigned int id) {
* not supported.
*/
std::unique_ptr<ITensorSummary> GetSummaryPtr(const std::shared_ptr<TensorData> &tensor,
const void *const previous_tensor_ptr, uint32_t num_elements,
uint32_t prev_num_elements, int tensor_dtype) {
const void *const previous_tensor_ptr, uint64_t num_elements,
uint64_t prev_num_elements, int tensor_dtype) {
MS_EXCEPTION_IF_NULL(tensor);
switch (tensor_dtype) {
case DbgDataType::DT_UINT8: {
@@ -217,7 +227,7 @@ DebugServices::TensorStat DebugServices::GetTensorStatistics(const std::shared_p
* run iteration for tensor's graph.
*/
const void *DebugServices::GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bool previous_iter_tensor_needed,
uint32_t *prev_num_elements, bool *history_not_found) {
uint64_t *prev_num_elements, bool *history_not_found) {
MS_EXCEPTION_IF_NULL(tensor);
const void *previous_tensor_ptr = nullptr;
std::shared_ptr<TensorData> tensor_prev;
@@ -439,7 +449,7 @@ bool DebugServices::CompareCurrentRootGraph(uint32_t id) {
* Description: Returns the previous tensor pointer if the current root graph id is equal to previous root graph id and
* prev_tensor_data is not nullptr.
*/
const void *DebugServices::PreparePrevTensor(uint32_t *prev_num_elements, const std::string &tensor_name) {
const void *DebugServices::PreparePrevTensor(uint64_t *prev_num_elements, const std::string &tensor_name) {
std::shared_ptr<TensorData> prev_tensor_data;
if (!CompareCurrentRootGraph(Debugger::GetInstance()->GetPrevRootGraphId())) {
// not supporting watchpoints that need prev tensor for multi root graph networks.
@@ -526,8 +536,8 @@ void DebugServices::CheckWatchpointsForTensor(
}
(*chunk_tensor_byte_size)[chunk_id] += tensor->GetByteSize();
int tensor_dtype = tensor->GetType();
uint32_t num_elements = tensor->GetNumElements();
uint32_t prev_num_elements = 0;
uint64_t num_elements = tensor->GetNumElements();
uint64_t prev_num_elements = 0;
const void *previous_tensor_ptr = nullptr;
#ifdef OFFLINE_DBG_MODE
bool history_not_found = 0;
@@ -875,15 +885,8 @@ void DebugServices::ProcessConvertToHostFormat(const std::vector<std::string> &f
}
struct dirent *dir = nullptr;
while ((dir = readdir(d_handle)) != nullptr) {
struct stat st;
std::string name = real_dump_iter_dir + std::string("/") + std::string(dir->d_name);
int ret = stat(name.c_str(), &st);
if (ret != 0) {
MS_LOG(ERROR) << "stat error, ret is: " << ret;
(void)closedir(d_handle);
return;
}
if (S_ISREG(st.st_mode)) {
if (IsRegFile(name)) {
std::string candidate = dir->d_name;
for (const std::string &file_to_find : files_after_convert_in_dir) {
std::string file_n = file_to_find;
@@ -991,18 +994,11 @@ void DebugServices::ProcessConvertList(const std::string &prefix_dump_file_name,
DIR *d = opendir(specific_dump_dir.c_str());
struct dirent *dir = nullptr;
while ((dir = readdir(d)) != nullptr) {
struct stat st;
std::string name = specific_dump_dir + std::string("/") + std::string(dir->d_name);
int ret = stat(name.c_str(), &st);
if (ret != 0) {
MS_LOG(ERROR) << "stat error, ret is: " << ret;
(void)closedir(d);
return;
}
if (!(S_ISREG(st.st_mode))) {
std::string file_name = dir->d_name;
std::string file_path = specific_dump_dir + std::string("/") + file_name;
if (!IsRegFile(file_path)) {
continue;
}
std::string file_name = dir->d_name;
std::string file_name_w_o_perfix = file_name;
auto type_pos = file_name.find('.');
// adding dot to avoid problematic matching in the scope.
@@ -1018,8 +1014,7 @@ void DebugServices::ProcessConvertList(const std::string &prefix_dump_file_name,
} else {
// otherwise, if file matches prefix and already has been converted to host format
// add to result of converted files.
std::string found_file = specific_dump_dir + "/" + file_name;
result_list->insert(found_file);
result_list->insert(file_path);
}
}
(void)closedir(d);
@@ -1466,16 +1461,9 @@ void DebugServices::ReadDumpedTensorSync(const std::string &prefix_dump_file_nam
} else {
struct dirent *dir = nullptr;
while ((dir = readdir(d)) != nullptr) {
struct stat st;
std::string name = abspath + std::string("/") + std::string(dir->d_name);
int ret = stat(name.c_str(), &st);
if (ret != 0) {
MS_LOG(ERROR) << "stat error, ret is: " << ret;
(void)closedir(d);
return;
}
if (S_ISREG(st.st_mode)) {
std::string file_name = dir->d_name;
std::string file_name = dir->d_name;
std::string file_path = abspath + std::string("/") + file_name;
if (IsRegFile(file_path)) {
std::string stripped_file_name = GetStrippedFilename(file_name);
if (stripped_file_name.empty()) {
continue;
@@ -1484,8 +1472,7 @@ void DebugServices::ReadDumpedTensorSync(const std::string &prefix_dump_file_nam
if (found != 0) {
continue;
}
std::string full_path = specific_dump_dir + "/" + file_name;
matched_paths.push_back(full_path);
matched_paths.push_back(file_path);
found_file = true;
}
}
@@ -1647,16 +1634,9 @@ void DebugServices::ProcessTensorDataSync(const std::vector<std::tuple<std::stri
} else {
struct dirent *dir = nullptr;
while ((dir = readdir(d)) != nullptr) {
struct stat st;
std::string name = specific_dump_dir + std::string("/") + std::string(dir->d_name);
int ret = stat(name.c_str(), &st);
if (ret != 0) {
MS_LOG(ERROR) << "stat error, ret is: " << ret;
(void)closedir(d);
return;
}
if (S_ISREG(st.st_mode)) {
std::string file_name = dir->d_name;
std::string file_name = dir->d_name;
std::string file_path = specific_dump_dir + std::string("/") + file_name;
if (IsRegFile(file_path)) {
for (auto &node : proto_to_dump) {
std::string dump_name = std::get<1>(node);
std::string stripped_file_name = GetStrippedFilename(file_name);
@@ -1882,7 +1862,7 @@ std::string GetOnlineOpOverflowDir() {
return overflow_bin_path;
}

void DebugServices::AddOpOverflowOpNames(const std::string overflow_bin_path, std::vector<std::string> *op_names) {
void DebugServices::AddOpOverflowOpNames(const std::string &overflow_bin_path, std::vector<std::string> *op_names) {
MS_EXCEPTION_IF_NULL(op_names);
std::map<std::pair<uint64_t, uint64_t>, std::string> task_stream_to_opname;
std::vector<std::pair<uint64_t, uint64_t>> task_stream_hit;
@@ -1896,18 +1876,9 @@ void DebugServices::AddOpOverflowOpNames(const std::string overflow_bin_path, st
} else {
struct dirent *dir = nullptr;
while ((dir = readdir(d)) != nullptr) {
struct stat st;
std::string name = overflow_bin_path + std::string("/") + std::string(dir->d_name);
int ret = stat(name.c_str(), &st);
if (ret != 0) {
MS_LOG(ERROR) << "stat error, ret is: " << ret;
(void)closedir(d);
return;
}
if (S_ISREG(st.st_mode)) {
// form fully qualified filename
std::string file_path = name;
std::string file_name = dir->d_name;
std::string file_name = dir->d_name;
std::string file_path = overflow_bin_path + std::string("/") + file_name;
if (IsRegFile(file_path)) {
// attempt to read the file
std::ifstream infile;
infile.open(file_path.c_str(), std::ios::ate | std::ios::binary | std::ios::in);
@@ -2082,7 +2053,6 @@ bool DebugServices::GetAttrsFromFilename(const std::string &file_name, std::stri
fourth_dot == std::string::npos) {
return false;
}

// get node_name
if (first_dot < second_dot) {
*node_name = file_name.substr(first_dot + 1, second_dot - first_dot - 1);
@@ -2090,7 +2060,6 @@ bool DebugServices::GetAttrsFromFilename(const std::string &file_name, std::stri
MS_LOG(ERROR) << "filename parse error to get node_name.";
return false;
}

// get task id
if (second_dot < third_dot) {
std::string extracted_task_id = file_name.substr(second_dot + 1, third_dot - second_dot - 1);
@@ -2104,10 +2073,9 @@ bool DebugServices::GetAttrsFromFilename(const std::string &file_name, std::stri
return false;
}
} else {
MS_LOG(ERROR) << "filename parse error to get task_id.";
MS_LOG(ERROR) << "Filename <" << file_name << "> parse error to get task_id.";
return false;
}

// get stream id
if (third_dot < fourth_dot) {
std::string extracted_stream_id = file_name.substr(third_dot + 1, fourth_dot - third_dot - 1);
@@ -2121,7 +2089,7 @@ bool DebugServices::GetAttrsFromFilename(const std::string &file_name, std::stri
return false;
}
} else {
MS_LOG(ERROR) << "filename parse error to get stream_id.";
MS_LOG(ERROR) << "Filename <" << file_name << "> parse error to get stream_id.";
return false;
}



+ 15
- 15
mindspore/ccsrc/debug/debug_services.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -132,7 +132,7 @@ class DebugServices {

std::string FindQualifiedTensorName(const std::string &tensor_name, unsigned const int &tensor_device_id,
unsigned const int &tensor_root_graph_id) const {
int indx = 0;
size_t indx = 0;
for (auto check_node : check_node_list) {
std::string w_name = std::get<0>(check_node);
bool w_type = std::get<1>(check_node);
@@ -204,8 +204,8 @@ class DebugServices {

struct TensorStat {
TensorStat(uint64_t data_size, int dtype, const std::vector<int64_t> &shape, bool is_bool, double max_value,
double min_value, double avg_value, int count, int neg_zero_count, int pos_zero_count, int nan_count,
int neg_inf_count, int pos_inf_count, int zero_count)
double min_value, double avg_value, uint64_t count, uint64_t neg_zero_count, uint64_t pos_zero_count,
uint64_t nan_count, uint64_t neg_inf_count, uint64_t pos_inf_count, uint64_t zero_count)
: data_size(data_size),
dtype(dtype),
shape(shape),
@@ -230,19 +230,19 @@ class DebugServices {
double max_value = std::numeric_limits<double>::lowest();
double min_value = std::numeric_limits<double>::max();
double avg_value = 0.0;
int count = 0;
int neg_zero_count = 0;
int pos_zero_count = 0;
int nan_count = 0;
int neg_inf_count = 0;
int pos_inf_count = 0;
int zero_count = 0;
uint64_t count = 0;
uint64_t neg_zero_count = 0;
uint64_t pos_zero_count = 0;
uint64_t nan_count = 0;
uint64_t neg_inf_count = 0;
uint64_t pos_inf_count = 0;
uint64_t zero_count = 0;
};

static TensorStat GetTensorStatistics(const std::shared_ptr<TensorData> &tensor);

void AddWatchpoint(
unsigned int id, unsigned int watch_condition, float parameter,
unsigned int id, int watch_condition, float parameter,
const std::vector<std::tuple<std::string, bool>> &check_node_list, const std::vector<parameter_t> &parameter_list,
const std::vector<std::tuple<std::string, std::vector<uint32_t>>> *check_node_device_list = nullptr,
const std::vector<std::tuple<std::string, std::vector<uint32_t>>> *check_node_graph_list = nullptr);
@@ -263,7 +263,7 @@ class DebugServices {
const std::vector<parameter_t> &parameter_list);
#endif

const void *PreparePrevTensor(uint32_t *prev_num_elements, const std::string &tensor_name);
const void *PreparePrevTensor(uint64_t *prev_num_elements, const std::string &tensor_name);

void CheckHistoryErrorCode(int *error_code, bool history_not_found);

@@ -279,7 +279,7 @@ class DebugServices {
std::vector<unsigned int> *device_id, std::vector<unsigned int> *root_graph_id,
bool error_on_no_value = false);

void AddOpOverflowOpNames(const std::string overflow_bin_path, std::vector<std::string> *op_names);
void AddOpOverflowOpNames(const std::string &overflow_bin_path, std::vector<std::string> *op_names);

void CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<int> *condition,
std::vector<unsigned int> *const watchpoint_id,
@@ -363,7 +363,7 @@ class DebugServices {
bool error_on_no_value = false);

const void *GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bool previous_iter_tensor_needed,
uint32_t *prev_num_elements, bool *history_not_found);
uint64_t *prev_num_elements, bool *history_not_found);

void ReadTensorFromNpy(const std::string &tensor_name, const std::string &file_name, std::string *const tensor_type,
std::size_t *const size, std::vector<int64_t> *const shape,


+ 6
- 6
mindspore/ccsrc/debug/debugger/debugger.cc View File

@@ -770,7 +770,7 @@ void Debugger::SendHeartbeat(int32_t period) {
while (enable_heartbeat_) {
MS_EXCEPTION_IF_NULL(grpc_client_);
EventReply reply = grpc_client_->SendHeartbeat(heartbeat);
if (reply.status() != reply.OK) {
if (reply.status() != EventReply::OK) {
MS_LOG(ERROR) << "Error: SendHeartbeat failed";
num_heartbeat_fail++;
if (num_heartbeat_fail >= max_num_heartbeat_fail) {
@@ -801,7 +801,7 @@ void Debugger::SendGraphAndSuspend(const GraphProto &graph_proto) {
// send graph to MindInsight server
MS_EXCEPTION_IF_NULL(grpc_client_);
EventReply reply = grpc_client_->SendGraph(graph_proto);
if (reply.status() != reply.OK) {
if (reply.status() != EventReply::OK) {
MS_LOG(ERROR) << "Error: SendGraph failed";
}
// enter command loop, wait and process commands
@@ -827,7 +827,7 @@ bool Debugger::SendMetadata(bool version_check) {
EventReply reply_metadata = grpc_client_->SendMetadata(metadata);

bool ret = false;
if (reply_metadata.status() == reply_metadata.OK) {
if (reply_metadata.status() == EventReply::OK) {
if (version_check) {
// get type of the command in meta data reply, it should be version matched
DebuggerCommand cmd = GetCommand(reply_metadata);
@@ -885,7 +885,7 @@ void Debugger::SendMultiGraphsAndSuspend(const std::list<GraphProto> &graph_prot
}
}
EventReply reply = grpc_client_->SendMultiGraphs(chunked_graph_proto_list);
if (reply.status() != reply.OK) {
if (reply.status() != EventReply::OK) {
MS_LOG(ERROR) << "Error: SendGraph failed";
}
// enter command loop, wait and process commands
@@ -923,7 +923,7 @@ void Debugger::CommandLoop() {
// wait for command
MS_EXCEPTION_IF_NULL(grpc_client_);
EventReply reply = grpc_client_->WaitForCommand(metadata);
if (reply.status() != reply.OK) {
if (reply.status() != EventReply::OK) {
MS_LOG(ERROR) << "Error: WaitForCommand failed";
num_wait_fail++;
if (num_wait_fail > max_num_wait_fail) {
@@ -1327,7 +1327,7 @@ void Debugger::SendWatchpoints(const std::list<WatchpointHit> &points) {
if (!points.empty()) {
MS_EXCEPTION_IF_NULL(grpc_client_);
EventReply reply = grpc_client_->SendWatchpointHits(points);
if (reply.status() != reply.OK) {
if (reply.status() != EventReply::OK) {
MS_LOG(ERROR) << "Error: SendWatchpointHits failed";
}
}


+ 14
- 9
mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.cc View File

@@ -18,6 +18,7 @@
#include <algorithm>
#include <chrono>

namespace mindspore {
DbgServices::DbgServices() { debug_services_ = std::make_shared<DebugServices>(); }

DbgServices::DbgServices(const DbgServices &other) {
@@ -34,10 +35,7 @@ DbgServices &DbgServices::operator=(const DbgServices &other) {
}

#if !defined(__APPLE__)
DbgServices::~DbgServices() noexcept {
MS_LOG(INFO) << "cpp DbgServices object is deleted";
debug_services_ = nullptr;
}
DbgServices::~DbgServices() { ClearData(); }
#else
DbgServices::~DbgServices() {
MS_LOG(INFO) << "cpp DbgServices object is deleted";
@@ -45,6 +43,11 @@ DbgServices::~DbgServices() {
}
#endif

void DbgServices::ClearData() noexcept {
MS_LOG(INFO) << "cpp DbgServices object is deleted";
debug_services_ = nullptr;
}

std::string DbgServices::GetVersion() const {
MS_LOG(INFO) << "get version is called";
return MSVERSION;
@@ -74,7 +77,7 @@ int32_t DbgServices::Initialize(const std::string net_name, const std::string du
}

int32_t DbgServices::AddWatchpoint(
unsigned int id, unsigned int watch_condition,
unsigned int id, int watch_condition,
std::map<std::string, std::map<std::string, std::variant<bool, std::vector<std::string>>>> check_nodes,
std::vector<parameter_t> parameter_list) {
MS_EXCEPTION_IF_NULL(debug_services_);
@@ -93,7 +96,7 @@ int32_t DbgServices::AddWatchpoint(
std::vector<std::uint32_t> rank_id;
(void)std::transform(
rank_id_str.begin(), rank_id_str.end(), std::back_inserter(rank_id),
[](std::string &id_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(id_str)); });
[](const std::string &id_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(id_str)); });
MS_LOG(DEBUG) << "cpp DbgServices AddWatchpoint rank_id: ";
for (auto const &i : rank_id) {
MS_LOG(DEBUG) << i << " ";
@@ -103,7 +106,7 @@ int32_t DbgServices::AddWatchpoint(
std::vector<std::uint32_t> root_graph_id;
(void)std::transform(
root_graph_id_str.begin(), root_graph_id_str.end(), std::back_inserter(root_graph_id),
[](std::string &graph_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(graph_str)); });
[](const std::string &graph_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(graph_str)); });
MS_LOG(DEBUG) << "cpp DbgServices AddWatchpoint root_graph_id: ";
for (auto const &j : root_graph_id) {
MS_LOG(DEBUG) << j << " ";
@@ -293,7 +296,8 @@ std::vector<tensor_data_t> DbgServices::ReadTensors(const std::vector<tensor_inf
result_list = ReadTensorsUtil(info);
for (auto result : result_list) {
MS_EXCEPTION_IF_NULL(result);
tensor_data_t tensor_data_item(result->GetDataPtr(), result->GetByteSize(), result->GetType(), result->GetShape());
tensor_data_t tensor_data_item(result->GetDataPtr(), result->GetByteSize(), static_cast<int>(result->GetType()),
result->GetShape());
tensors_read.push_back(tensor_data_item);
}
return tensors_read;
@@ -310,7 +314,7 @@ std::vector<TensorBaseData> DbgServices::ReadTensorsBase(const std::vector<tenso
tensors_read_base.push_back(tensor_data_item);
continue;
}
TensorBaseData tensor_data_item(result->GetByteSize(), result->GetType(), result->GetShape());
TensorBaseData tensor_data_item(result->GetByteSize(), static_cast<int>(result->GetType()), result->GetShape());
tensors_read_base.push_back(tensor_data_item);
}
return tensors_read_base;
@@ -346,3 +350,4 @@ std::vector<TensorStatData> DbgServices::ReadTensorsStat(const std::vector<tenso

return tensors_read_stat;
}
} // namespace mindspore

+ 18
- 16
mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.h View File

@@ -32,6 +32,7 @@
namespace py = pybind11;
namespace common = mindspore::common;

namespace mindspore {
struct parameter_t {
parameter_t(const std::string &name, bool disabled, double value, bool hit, double actual_value)
: name(name), disabled(disabled), value(value), hit(hit), actual_value(actual_value) {}
@@ -157,13 +158,13 @@ struct TensorStatData {
const double max_value() const { return max_value_; }
const double min_value() const { return min_value_; }
const double avg_value() const { return avg_value_; }
const int count() const { return count_; }
const int neg_zero_count() const { return neg_zero_count_; }
const int pos_zero_count() const { return pos_zero_count_; }
const int nan_count() const { return nan_count_; }
const int neg_inf_count() const { return neg_inf_count_; }
const int pos_inf_count() const { return pos_inf_count_; }
const int zero_count() const { return zero_count_; }
const uint64_t count() const { return count_; }
const uint64_t neg_zero_count() const { return neg_zero_count_; }
const uint64_t pos_zero_count() const { return pos_zero_count_; }
const uint64_t nan_count() const { return nan_count_; }
const uint64_t neg_inf_count() const { return neg_inf_count_; }
const uint64_t pos_inf_count() const { return pos_inf_count_; }
const uint64_t zero_count() const { return zero_count_; }

uint64_t data_size_;
int dtype_;
@@ -172,13 +173,13 @@ struct TensorStatData {
double max_value_;
double min_value_;
double avg_value_;
int count_;
int neg_zero_count_;
int pos_zero_count_;
int nan_count_;
int neg_inf_count_;
int pos_inf_count_;
int zero_count_;
uint64_t count_;
uint64_t neg_zero_count_;
uint64_t pos_zero_count_;
uint64_t nan_count_;
uint64_t neg_inf_count_;
uint64_t pos_inf_count_;
uint64_t zero_count_;
};

class DbgServices {
@@ -195,7 +196,7 @@ class DbgServices {
uint64_t max_mem_usage);

int32_t AddWatchpoint(
unsigned int id, unsigned int watch_condition,
unsigned int id, int watch_condition,
std::map<std::string, std::map<std::string, std::variant<bool, std::vector<std::string>>>> check_nodes,
std::vector<parameter_t> parameter_list);

@@ -215,6 +216,7 @@ class DbgServices {

private:
std::shared_ptr<DebugServices> debug_services_ = nullptr;
void ClearData() noexcept;
};
} // namespace mindspore
#endif // DEBUG_DBG_SERVICES_H_

+ 2
- 0
mindspore/ccsrc/debug/debugger/offline_debug/mi_pybind_register.cc View File

@@ -18,6 +18,7 @@
#include "pybind11/stl_bind.h"
#include "debugger/offline_debug/dbg_services.h"

namespace mindspore {
PYBIND11_MODULE(_mindspore_offline_debug, m) {
m.doc() = "pybind11 debug services api";
(void)py::class_<DbgServices>(m, "DbgServices")
@@ -90,3 +91,4 @@ PYBIND11_MODULE(_mindspore_offline_debug, m) {
.def("pos_inf_count", &TensorStatData::pos_inf_count)
.def("zero_count", &TensorStatData::zero_count);
}
} // namespace mindspore

+ 1
- 1
mindspore/ccsrc/debug/debugger/proto_exporter.cc View File

@@ -514,7 +514,7 @@ void DebuggerProtoExporter::ExportValueNodes(const std::map<AnfNodePtr, size_t>
}
}

void DebuggerProtoExporter::InitModelInfo() { model_.set_ir_version(debugger::IR_VERSION); }
void DebuggerProtoExporter::InitModelInfo() { model_.set_ir_version(static_cast<int64_t>(debugger::IR_VERSION)); }

debugger::ModelProto GetDebuggerFuncGraphProto(const FuncGraphPtr &func_graph) {
DebuggerProtoExporter exporter;


+ 1
- 1
mindspore/ccsrc/debug/debugger/tensor_summary.cc View File

@@ -92,7 +92,7 @@ double VarianceAndMeanCalculator::GetStandardDeviation() { return sqrt(GetVarian

template <typename T>
TensorSummary<T>::TensorSummary(const void *current_tensor_ptr, const void *const previous_tensor_ptr,
uint32_t num_elements, uint32_t prev_num_elements)
uint64_t num_elements, uint64_t prev_num_elements)
: current_tensor_ptr_(reinterpret_cast<const T *>(current_tensor_ptr)),
prev_tensor_ptr_(reinterpret_cast<const T *>(previous_tensor_ptr)),
num_elements_(num_elements),


+ 24
- 24
mindspore/ccsrc/debug/debugger/tensor_summary.h View File

@@ -105,13 +105,13 @@ class ITensorSummary {
virtual const double max_value() const = 0;
virtual const double min_value() const = 0;
virtual const double avg_value() const = 0;
virtual const int count() const = 0;
virtual const int neg_zero_count() const = 0;
virtual const int pos_zero_count() const = 0;
virtual const int nan_count() const = 0;
virtual const int neg_inf_count() const = 0;
virtual const int pos_inf_count() const = 0;
virtual const int zero_count() const = 0;
virtual const uint64_t count() const = 0;
virtual const uint64_t neg_zero_count() const = 0;
virtual const uint64_t pos_zero_count() const = 0;
virtual const uint64_t nan_count() const = 0;
virtual const uint64_t neg_inf_count() const = 0;
virtual const uint64_t pos_inf_count() const = 0;
virtual const uint64_t zero_count() const = 0;
};

template <typename T>
@@ -119,7 +119,7 @@ class TensorSummary : public ITensorSummary {
public:
TensorSummary() = default;
~TensorSummary() override = default;
TensorSummary(const void *, const void *, uint32_t, uint32_t);
TensorSummary(const void *, const void *, uint64_t, uint64_t);
void SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &) override;
// returns hit, error_code, parameter_list
std::tuple<bool, int, std::vector<DebugServices::parameter_t>> IsWatchpointHit(DebugServices::watchpoint_t) override;
@@ -128,30 +128,30 @@ class TensorSummary : public ITensorSummary {
const double max_value() const override { return max_; }
const double min_value() const override { return min_; }
const double avg_value() const override { return avg_; }
const int count() const override { return num_elements_; }
const int neg_zero_count() const override { return neg_zero_count_; }
const int pos_zero_count() const override { return pos_zero_count_; }
const int nan_count() const override { return nan_count_; }
const int neg_inf_count() const override { return neg_inf_count_; }
const int pos_inf_count() const override { return pos_inf_count_; }
const int zero_count() const override { return zero_count_; }
const uint64_t count() const override { return num_elements_; }
const uint64_t neg_zero_count() const override { return neg_zero_count_; }
const uint64_t pos_zero_count() const override { return pos_zero_count_; }
const uint64_t nan_count() const override { return nan_count_; }
const uint64_t neg_inf_count() const override { return neg_inf_count_; }
const uint64_t pos_inf_count() const override { return pos_inf_count_; }
const uint64_t zero_count() const override { return zero_count_; }

private:
const T *current_tensor_ptr_;
const T *prev_tensor_ptr_;
uint32_t num_elements_;
uint32_t prev_num_elements_;
uint64_t num_elements_;
uint64_t prev_num_elements_;
double min_;
double max_;
double avg_;
bool is_bool_;
uint32_t neg_zero_count_;
uint32_t pos_zero_count_;
uint32_t pos_inf_count_;
uint32_t neg_inf_count_;
uint32_t inf_count_;
uint32_t nan_count_;
uint32_t zero_count_;
uint64_t neg_zero_count_;
uint64_t pos_zero_count_;
uint64_t pos_inf_count_;
uint64_t neg_inf_count_;
uint64_t inf_count_;
uint64_t nan_count_;
uint64_t zero_count_;
double epsilon_;
bool mean_sd_cal_enabled_;
VarianceAndMeanCalculator current_mean_variance_;


+ 3
- 3
mindspore/ccsrc/debug/dump_proto.cc View File

@@ -125,7 +125,7 @@ void CheckIfValidType(const TypePtr &type) {
}
}

void SetTensorType(const TypePtr &type, const BaseShapePtr &shape, irpb::TypeProto *type_proto) {
void SetTensorType(const TypePtr &type, const BaseShapePtr &shape, irpb::TypeProto *const type_proto) {
TypePtr elem_type = dyn_cast<TensorType>(type)->element();
type_proto->mutable_tensor_type()->set_elem_type(GetNumberDataType(elem_type));
type_proto->set_data_type(irpb::DT_TENSOR);
@@ -189,7 +189,7 @@ void ProtoExporter::SetNodeOutputType(const AnfNodePtr &node, irpb::TypeProto *t
SetNodeOutputType(node->Type(), node->Shape(), type_proto);
}

void ProtoExporter::SetValueToProtoBasicTypes(const ValuePtr &val, irpb::ValueProto *value_proto) {
void ProtoExporter::SetValueToProtoBasicTypes(const ValuePtr &val, irpb::ValueProto *const value_proto) {
if (val->isa<StringImm>()) {
const StringImmPtr &value = dyn_cast<StringImm>(val);
value_proto->set_dtype(irpb::DT_STRING);
@@ -580,7 +580,7 @@ void ProtoExporter::ExportValueNodes(const std::map<AnfNodePtr, size_t> &const_m
}
}

void ProtoExporter::InitModelInfo() { model_.set_ir_version(irpb::IR_VERSION); }
void ProtoExporter::InitModelInfo() { model_.set_ir_version(static_cast<int64_t>(irpb::IR_VERSION)); }

std::string GetFuncGraphProtoString(const FuncGraphPtr &func_graph) {
ProtoExporter exporter;


+ 2
- 2
mindspore/ccsrc/debug/env_config_parser.h View File

@@ -28,7 +28,7 @@ enum RdrModes : int { Exceptional = 1, Normal = 2 };
class EnvConfigParser {
public:
static EnvConfigParser &GetInstance() {
static EnvConfigParser instance;
static EnvConfigParser instance = EnvConfigParser();
instance.Parse();
return instance;
}
@@ -42,7 +42,7 @@ class EnvConfigParser {
int RdrMode() const { return rdr_mode_; }
std::string RdrPath() const { return rdr_path_; }
#endif
bool GetSysMemreuse() { return sys_memreuse_; }
bool GetSysMemreuse() const { return sys_memreuse_; }
void SetSysMemreuse(bool set_memreuse) { sys_memreuse_ = set_memreuse; }

private:


+ 13
- 14
mindspore/ccsrc/debug/tensor_data.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -19,7 +19,6 @@
#include <algorithm>
#include <vector>
#include <string>
#include <cstring>
#include <iostream>
#include "mindspore/core/utils/log_adapter.h"
#ifdef ONLINE_DBG_MODE
@@ -162,7 +161,6 @@ class TensorData {
this->name_ = obj.name_;
this->execution_order_ = obj.execution_order_;
this->slot_ = obj.slot_;
this->data_ptr_ = obj.data_ptr_;
this->size_ = obj.size_;
this->data_type_ = obj.data_type_;
this->data_type_size_ = obj.data_type_size_;
@@ -177,18 +175,19 @@ class TensorData {
#endif
}

#ifdef OFFLINE_DBG_MODE
~TensorData() { DeleteDataPtr(); }
#else
~TensorData() {}
#endif

void DeleteDataPtr() {
if (this->data_ptr_ != NULL) {
void DeleteDataPtr() noexcept {
#ifdef ONLINE_DBG_MODE
this->tensor_ptr_ = nullptr;
this->data_ptr_ = nullptr;
#else
if (this->data_ptr_ != nullptr) {
delete this->data_ptr_;
this->data_ptr_ = NULL;
this->data_ptr_ = nullptr;
this->size_ = 0;
}
#endif
}

std::string GetName() const { return this->name_; }
@@ -206,7 +205,7 @@ class TensorData {
void SetTimeStamp(const std::string &time_stamp) { this->time_stamp_ = time_stamp; }

#ifdef ONLINE_DBG_MODE
void SetTensor(mindspore::tensor::TensorPtr out_tensor) { this->tensor_ptr_ = out_tensor; }
void SetTensor(const mindspore::tensor::TensorPtr &out_tensor) { this->tensor_ptr_ = out_tensor; }
#endif

void SetSlot(size_t slot) { this->slot_ = slot; }
@@ -215,7 +214,7 @@ class TensorData {

void SetDataPtr(char *data_ptr) { this->data_ptr_ = data_ptr; }

uint32_t GetNumElements() { return size_ / data_type_size_; }
uint64_t GetNumElements() const { return size_ / data_type_size_; }

uint64_t GetByteSize() const { return this->size_; }

@@ -223,7 +222,7 @@ class TensorData {

std::vector<int64_t> GetShape() const { return this->shape_; }

void SetShape(std::vector<int64_t> shape) { this->shape_ = shape; }
void SetShape(const std::vector<int64_t> &shape) { this->shape_ = shape; }

unsigned int GetIteration() const { return this->iteration_; }

@@ -245,7 +244,7 @@ class TensorData {

void SetType(unsigned int type) { ConvertMsToDbgType(type); }

void SetType(std::string type_name) { ConvertStringToDbgType(type_name); }
void SetType(const std::string &type_name) { ConvertStringToDbgType(type_name); }

bool GetIsOutput() const { return this->is_output_; }



+ 11
- 8
mindspore/ccsrc/debug/tensor_load.h View File

@@ -40,7 +40,7 @@ class TensorLoader {

~TensorLoader() { EmptyTensor(); }

void MoveTensorCurrentToPrev(std::string tensor_name) {
void MoveTensorCurrentToPrev(const std::string &tensor_name) {
auto handle = tensor_list_map_.extract(tensor_name);
if (!handle.empty()) {
MS_LOG(INFO) << "Moving " << tensor_name << " from current map to previous map";
@@ -50,12 +50,14 @@ class TensorLoader {

void SwapCurrentPrev() { tensor_list_map_.swap(prev_tensor_list_map_); }

bool TensorExistsInCurrent(std::string tensor_name) const {
bool TensorExistsInCurrent(const std::string &tensor_name) const {
return tensor_list_map_.find(tensor_name) != tensor_list_map_.end();
}

// only parameters will return true
bool PrevTensorExistsInCurrent(std::string tensor_name) const { return TensorExistsInCurrent(tensor_name + ":prev"); }
bool PrevTensorExistsInCurrent(const std::string &tensor_name) const {
return TensorExistsInCurrent(tensor_name + ":prev");
}

void MoveParametersCurrentToPrev() {
MS_LOG(INFO) << "Moving parameters from current map to previous map";
@@ -85,7 +87,7 @@ class TensorLoader {
* Description: Load new tensor into tensor_list_map_ (debugger backend cache). In offline debugger, add ":prev" to
* the previous tensor's name to avoid segfault caused by wrongly evicting the tensor when memory limit is enabled.
*/
bool LoadNewTensor(std::shared_ptr<TensorData> tensor, bool keep_prev) {
bool LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev) {
lock_.lock();
auto tensor_name = tensor->GetName();
if (keep_prev) {
@@ -98,8 +100,9 @@ class TensorLoader {
}
std::string key_name = tensor_name;
#ifdef OFFLINE_DBG_MODE
std::string output_type = tensor->GetIsOutput() ? "1" : "0";
key_name += (":" + std::to_string(tensor->GetDeviceId()) + ":" + std::to_string(tensor->GetRootGraphId()) + ":" +
std::to_string(tensor->GetIsOutput()) + ":" + std::to_string(tensor->GetSlot()));
output_type + ":" + std::to_string(tensor->GetSlot()));
if (tensor_list_map_.find(key_name) != tensor_list_map_.end() &&
tensor->GetIteration() == tensor_list_map_[key_name]->GetPrevIteration()) {
key_name += ":prev";
@@ -151,7 +154,7 @@ class TensorLoader {
}
}

void EmptyTensor() {
void EmptyTensor() noexcept {
std::lock_guard<std::mutex> lg(lock_);
prev_tensor_list_map_.clear();
tensor_list_map_.swap(prev_tensor_list_map_);
@@ -159,7 +162,7 @@ class TensorLoader {

void EmptyCurrentTensor() { tensor_list_map_.clear(); }

bool EnableMemoryControl() { return mem_total_ > 0; }
bool EnableMemoryControl() const { return mem_total_ > 0; }

/*
* Feature group: Offline debugger.
@@ -210,7 +213,7 @@ class TensorLoader {
std::unique_lock<std::mutex> lk(mem_lock_);
while (data_size > mem_total_ - mem_usage_) {
// wait until there is any not-in-use candidate to be evicted from cache
evict_cond.wait(lk, [&] { return !cache_evict_queue_.empty(); });
evict_cond.wait(lk, [this] { return !cache_evict_queue_.empty(); });
candidate_name = cache_evict_queue_.front();
cache_evict_queue_.pop_front();
// evict candidate tensor


+ 3
- 2
mindspore/ccsrc/utils/summary/event_writer.cc View File

@@ -16,7 +16,6 @@

#include "utils/summary/event_writer.h"
#include <string>
#include <memory>
#include "utils/log_adapter.h"
#include "utils/convert_utils.h"

@@ -37,7 +36,9 @@ EventWriter::EventWriter(const std::string &file_full_name) : filename_(file_ful
status_ = true;
}

EventWriter::~EventWriter() {
EventWriter::~EventWriter() { CloseFile(); }

void EventWriter::CloseFile() noexcept {
if (event_file_ != nullptr) {
bool result = Close();
if (!result) {


+ 1
- 0
mindspore/ccsrc/utils/summary/event_writer.h View File

@@ -72,6 +72,7 @@ class EventWriter {
bool WriteRecord(const std::string &data);

private:
void CloseFile() noexcept;
// True: valid / False: closed
bool status_ = false;
std::shared_ptr<FileSystem> fs_;


Loading…
Cancel
Save