Merge pull request !29703 from TinaMengtingZhang/codexfeature/build-system-rewrite
| @@ -61,7 +61,8 @@ std::optional<std::string> Common::CreatePrefixPath(const std::string &input_pat | |||
| return std::string(prefix_path_str + "/" + file_name_str); | |||
| } | |||
| bool Common::CommonFuncForConfigPath(const std::string &default_path, const std::string &env_path, std::string *value) { | |||
| bool Common::CommonFuncForConfigPath(const std::string &default_path, const std::string &env_path, | |||
| std::string *const value) { | |||
| MS_EXCEPTION_IF_NULL(value); | |||
| value->clear(); | |||
| if (!env_path.empty()) { | |||
| @@ -512,7 +512,7 @@ void DumpJsonParser::ParseInputOutput(const nlohmann::json &content) { | |||
| CheckJsonUnsignedType(content, kInputOutput); | |||
| input_output_ = content; | |||
| const uint32_t max_inout_num = 2; | |||
| if (input_output_ < 0 || input_output_ > max_inout_num) { | |||
| if (input_output_ > max_inout_num) { | |||
| MS_LOG(EXCEPTION) << "Dump Json Parse Failed. input_output should be 0, 1, 2"; | |||
| } | |||
| } | |||
| @@ -557,7 +557,7 @@ void DumpJsonParser::ParseOpDebugMode(const nlohmann::json &content) { | |||
| CheckJsonUnsignedType(content, kOpDebugMode); | |||
| op_debug_mode_ = content; | |||
| const size_t max_mode = 3; | |||
| if (op_debug_mode_ < 0 || op_debug_mode_ > max_mode) { | |||
| if (op_debug_mode_ > max_mode) { | |||
| MS_LOG(EXCEPTION) << "Dump Json Parse Failed. op_debug_mode should be 0, 1, 2, 3"; | |||
| } | |||
| } | |||
| @@ -18,7 +18,6 @@ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_H_ | |||
| #include <dirent.h> | |||
| #include <stdlib.h> | |||
| #include <map> | |||
| #include <string> | |||
| @@ -53,6 +53,16 @@ constexpr char *kStrErrorNone = nullptr; | |||
| #endif | |||
| } // namespace | |||
| bool IsRegFile(const std::string &file_path) { | |||
| struct stat st; | |||
| int ret = stat(file_path.c_str(), &st); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "stat error for " << file_path << ", ret is: " << ret; | |||
| return false; | |||
| } | |||
| return S_ISREG(st.st_mode); | |||
| } | |||
| DebugServices::DebugServices() { tensor_loader_ = std::make_shared<TensorLoader>(); } | |||
| DebugServices::DebugServices(const DebugServices &other) { | |||
| @@ -80,7 +90,7 @@ DebugServices &DebugServices::operator=(const DebugServices &other) { | |||
| * watchpoint_table. | |||
| */ | |||
| void DebugServices::AddWatchpoint( | |||
| unsigned int id, unsigned int watch_condition, float parameter, | |||
| unsigned int id, int watch_condition, float parameter, | |||
| const std::vector<std::tuple<std::string, bool>> &check_node_list, const std::vector<parameter_t> ¶meter_list, | |||
| const std::vector<std::tuple<std::string, std::vector<uint32_t>>> *check_node_device_list, | |||
| const std::vector<std::tuple<std::string, std::vector<uint32_t>>> *check_node_graph_list) { | |||
| @@ -116,8 +126,8 @@ void DebugServices::RemoveWatchpoint(unsigned int id) { | |||
| * not supported. | |||
| */ | |||
| std::unique_ptr<ITensorSummary> GetSummaryPtr(const std::shared_ptr<TensorData> &tensor, | |||
| const void *const previous_tensor_ptr, uint32_t num_elements, | |||
| uint32_t prev_num_elements, int tensor_dtype) { | |||
| const void *const previous_tensor_ptr, uint64_t num_elements, | |||
| uint64_t prev_num_elements, int tensor_dtype) { | |||
| MS_EXCEPTION_IF_NULL(tensor); | |||
| switch (tensor_dtype) { | |||
| case DbgDataType::DT_UINT8: { | |||
| @@ -217,7 +227,7 @@ DebugServices::TensorStat DebugServices::GetTensorStatistics(const std::shared_p | |||
| * run iteration for tensor's graph. | |||
| */ | |||
| const void *DebugServices::GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bool previous_iter_tensor_needed, | |||
| uint32_t *prev_num_elements, bool *history_not_found) { | |||
| uint64_t *prev_num_elements, bool *history_not_found) { | |||
| MS_EXCEPTION_IF_NULL(tensor); | |||
| const void *previous_tensor_ptr = nullptr; | |||
| std::shared_ptr<TensorData> tensor_prev; | |||
| @@ -439,7 +449,7 @@ bool DebugServices::CompareCurrentRootGraph(uint32_t id) { | |||
| * Description: Returns the previous tensor pointer if the current root graph id is equal to previous root graph id and | |||
| * prev_tensor_data is not nullptr. | |||
| */ | |||
| const void *DebugServices::PreparePrevTensor(uint32_t *prev_num_elements, const std::string &tensor_name) { | |||
| const void *DebugServices::PreparePrevTensor(uint64_t *prev_num_elements, const std::string &tensor_name) { | |||
| std::shared_ptr<TensorData> prev_tensor_data; | |||
| if (!CompareCurrentRootGraph(Debugger::GetInstance()->GetPrevRootGraphId())) { | |||
| // not supporting watchpoints that need prev tensor for multi root graph networks. | |||
| @@ -526,8 +536,8 @@ void DebugServices::CheckWatchpointsForTensor( | |||
| } | |||
| (*chunk_tensor_byte_size)[chunk_id] += tensor->GetByteSize(); | |||
| int tensor_dtype = tensor->GetType(); | |||
| uint32_t num_elements = tensor->GetNumElements(); | |||
| uint32_t prev_num_elements = 0; | |||
| uint64_t num_elements = tensor->GetNumElements(); | |||
| uint64_t prev_num_elements = 0; | |||
| const void *previous_tensor_ptr = nullptr; | |||
| #ifdef OFFLINE_DBG_MODE | |||
| bool history_not_found = 0; | |||
| @@ -875,15 +885,8 @@ void DebugServices::ProcessConvertToHostFormat(const std::vector<std::string> &f | |||
| } | |||
| struct dirent *dir = nullptr; | |||
| while ((dir = readdir(d_handle)) != nullptr) { | |||
| struct stat st; | |||
| std::string name = real_dump_iter_dir + std::string("/") + std::string(dir->d_name); | |||
| int ret = stat(name.c_str(), &st); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "stat error, ret is: " << ret; | |||
| (void)closedir(d_handle); | |||
| return; | |||
| } | |||
| if (S_ISREG(st.st_mode)) { | |||
| if (IsRegFile(name)) { | |||
| std::string candidate = dir->d_name; | |||
| for (const std::string &file_to_find : files_after_convert_in_dir) { | |||
| std::string file_n = file_to_find; | |||
| @@ -991,18 +994,11 @@ void DebugServices::ProcessConvertList(const std::string &prefix_dump_file_name, | |||
| DIR *d = opendir(specific_dump_dir.c_str()); | |||
| struct dirent *dir = nullptr; | |||
| while ((dir = readdir(d)) != nullptr) { | |||
| struct stat st; | |||
| std::string name = specific_dump_dir + std::string("/") + std::string(dir->d_name); | |||
| int ret = stat(name.c_str(), &st); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "stat error, ret is: " << ret; | |||
| (void)closedir(d); | |||
| return; | |||
| } | |||
| if (!(S_ISREG(st.st_mode))) { | |||
| std::string file_name = dir->d_name; | |||
| std::string file_path = specific_dump_dir + std::string("/") + file_name; | |||
| if (!IsRegFile(file_path)) { | |||
| continue; | |||
| } | |||
| std::string file_name = dir->d_name; | |||
| std::string file_name_w_o_perfix = file_name; | |||
| auto type_pos = file_name.find('.'); | |||
| // adding dot to avoid problematic matching in the scope. | |||
| @@ -1018,8 +1014,7 @@ void DebugServices::ProcessConvertList(const std::string &prefix_dump_file_name, | |||
| } else { | |||
| // otherwise, if file matches prefix and already has been converted to host format | |||
| // add to result of converted files. | |||
| std::string found_file = specific_dump_dir + "/" + file_name; | |||
| result_list->insert(found_file); | |||
| result_list->insert(file_path); | |||
| } | |||
| } | |||
| (void)closedir(d); | |||
| @@ -1466,16 +1461,9 @@ void DebugServices::ReadDumpedTensorSync(const std::string &prefix_dump_file_nam | |||
| } else { | |||
| struct dirent *dir = nullptr; | |||
| while ((dir = readdir(d)) != nullptr) { | |||
| struct stat st; | |||
| std::string name = abspath + std::string("/") + std::string(dir->d_name); | |||
| int ret = stat(name.c_str(), &st); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "stat error, ret is: " << ret; | |||
| (void)closedir(d); | |||
| return; | |||
| } | |||
| if (S_ISREG(st.st_mode)) { | |||
| std::string file_name = dir->d_name; | |||
| std::string file_name = dir->d_name; | |||
| std::string file_path = abspath + std::string("/") + file_name; | |||
| if (IsRegFile(file_path)) { | |||
| std::string stripped_file_name = GetStrippedFilename(file_name); | |||
| if (stripped_file_name.empty()) { | |||
| continue; | |||
| @@ -1484,8 +1472,7 @@ void DebugServices::ReadDumpedTensorSync(const std::string &prefix_dump_file_nam | |||
| if (found != 0) { | |||
| continue; | |||
| } | |||
| std::string full_path = specific_dump_dir + "/" + file_name; | |||
| matched_paths.push_back(full_path); | |||
| matched_paths.push_back(file_path); | |||
| found_file = true; | |||
| } | |||
| } | |||
| @@ -1647,16 +1634,9 @@ void DebugServices::ProcessTensorDataSync(const std::vector<std::tuple<std::stri | |||
| } else { | |||
| struct dirent *dir = nullptr; | |||
| while ((dir = readdir(d)) != nullptr) { | |||
| struct stat st; | |||
| std::string name = specific_dump_dir + std::string("/") + std::string(dir->d_name); | |||
| int ret = stat(name.c_str(), &st); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "stat error, ret is: " << ret; | |||
| (void)closedir(d); | |||
| return; | |||
| } | |||
| if (S_ISREG(st.st_mode)) { | |||
| std::string file_name = dir->d_name; | |||
| std::string file_name = dir->d_name; | |||
| std::string file_path = specific_dump_dir + std::string("/") + file_name; | |||
| if (IsRegFile(file_path)) { | |||
| for (auto &node : proto_to_dump) { | |||
| std::string dump_name = std::get<1>(node); | |||
| std::string stripped_file_name = GetStrippedFilename(file_name); | |||
| @@ -1882,7 +1862,7 @@ std::string GetOnlineOpOverflowDir() { | |||
| return overflow_bin_path; | |||
| } | |||
| void DebugServices::AddOpOverflowOpNames(const std::string overflow_bin_path, std::vector<std::string> *op_names) { | |||
| void DebugServices::AddOpOverflowOpNames(const std::string &overflow_bin_path, std::vector<std::string> *op_names) { | |||
| MS_EXCEPTION_IF_NULL(op_names); | |||
| std::map<std::pair<uint64_t, uint64_t>, std::string> task_stream_to_opname; | |||
| std::vector<std::pair<uint64_t, uint64_t>> task_stream_hit; | |||
| @@ -1896,18 +1876,9 @@ void DebugServices::AddOpOverflowOpNames(const std::string overflow_bin_path, st | |||
| } else { | |||
| struct dirent *dir = nullptr; | |||
| while ((dir = readdir(d)) != nullptr) { | |||
| struct stat st; | |||
| std::string name = overflow_bin_path + std::string("/") + std::string(dir->d_name); | |||
| int ret = stat(name.c_str(), &st); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "stat error, ret is: " << ret; | |||
| (void)closedir(d); | |||
| return; | |||
| } | |||
| if (S_ISREG(st.st_mode)) { | |||
| // form fully qualified filename | |||
| std::string file_path = name; | |||
| std::string file_name = dir->d_name; | |||
| std::string file_name = dir->d_name; | |||
| std::string file_path = overflow_bin_path + std::string("/") + file_name; | |||
| if (IsRegFile(file_path)) { | |||
| // attempt to read the file | |||
| std::ifstream infile; | |||
| infile.open(file_path.c_str(), std::ios::ate | std::ios::binary | std::ios::in); | |||
| @@ -2082,7 +2053,6 @@ bool DebugServices::GetAttrsFromFilename(const std::string &file_name, std::stri | |||
| fourth_dot == std::string::npos) { | |||
| return false; | |||
| } | |||
| // get node_name | |||
| if (first_dot < second_dot) { | |||
| *node_name = file_name.substr(first_dot + 1, second_dot - first_dot - 1); | |||
| @@ -2090,7 +2060,6 @@ bool DebugServices::GetAttrsFromFilename(const std::string &file_name, std::stri | |||
| MS_LOG(ERROR) << "filename parse error to get node_name."; | |||
| return false; | |||
| } | |||
| // get task id | |||
| if (second_dot < third_dot) { | |||
| std::string extracted_task_id = file_name.substr(second_dot + 1, third_dot - second_dot - 1); | |||
| @@ -2104,10 +2073,9 @@ bool DebugServices::GetAttrsFromFilename(const std::string &file_name, std::stri | |||
| return false; | |||
| } | |||
| } else { | |||
| MS_LOG(ERROR) << "filename parse error to get task_id."; | |||
| MS_LOG(ERROR) << "Filename <" << file_name << "> parse error to get task_id."; | |||
| return false; | |||
| } | |||
| // get stream id | |||
| if (third_dot < fourth_dot) { | |||
| std::string extracted_stream_id = file_name.substr(third_dot + 1, fourth_dot - third_dot - 1); | |||
| @@ -2121,7 +2089,7 @@ bool DebugServices::GetAttrsFromFilename(const std::string &file_name, std::stri | |||
| return false; | |||
| } | |||
| } else { | |||
| MS_LOG(ERROR) << "filename parse error to get stream_id."; | |||
| MS_LOG(ERROR) << "Filename <" << file_name << "> parse error to get stream_id."; | |||
| return false; | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -132,7 +132,7 @@ class DebugServices { | |||
| std::string FindQualifiedTensorName(const std::string &tensor_name, unsigned const int &tensor_device_id, | |||
| unsigned const int &tensor_root_graph_id) const { | |||
| int indx = 0; | |||
| size_t indx = 0; | |||
| for (auto check_node : check_node_list) { | |||
| std::string w_name = std::get<0>(check_node); | |||
| bool w_type = std::get<1>(check_node); | |||
| @@ -204,8 +204,8 @@ class DebugServices { | |||
| struct TensorStat { | |||
| TensorStat(uint64_t data_size, int dtype, const std::vector<int64_t> &shape, bool is_bool, double max_value, | |||
| double min_value, double avg_value, int count, int neg_zero_count, int pos_zero_count, int nan_count, | |||
| int neg_inf_count, int pos_inf_count, int zero_count) | |||
| double min_value, double avg_value, uint64_t count, uint64_t neg_zero_count, uint64_t pos_zero_count, | |||
| uint64_t nan_count, uint64_t neg_inf_count, uint64_t pos_inf_count, uint64_t zero_count) | |||
| : data_size(data_size), | |||
| dtype(dtype), | |||
| shape(shape), | |||
| @@ -230,19 +230,19 @@ class DebugServices { | |||
| double max_value = std::numeric_limits<double>::lowest(); | |||
| double min_value = std::numeric_limits<double>::max(); | |||
| double avg_value = 0.0; | |||
| int count = 0; | |||
| int neg_zero_count = 0; | |||
| int pos_zero_count = 0; | |||
| int nan_count = 0; | |||
| int neg_inf_count = 0; | |||
| int pos_inf_count = 0; | |||
| int zero_count = 0; | |||
| uint64_t count = 0; | |||
| uint64_t neg_zero_count = 0; | |||
| uint64_t pos_zero_count = 0; | |||
| uint64_t nan_count = 0; | |||
| uint64_t neg_inf_count = 0; | |||
| uint64_t pos_inf_count = 0; | |||
| uint64_t zero_count = 0; | |||
| }; | |||
| static TensorStat GetTensorStatistics(const std::shared_ptr<TensorData> &tensor); | |||
| void AddWatchpoint( | |||
| unsigned int id, unsigned int watch_condition, float parameter, | |||
| unsigned int id, int watch_condition, float parameter, | |||
| const std::vector<std::tuple<std::string, bool>> &check_node_list, const std::vector<parameter_t> ¶meter_list, | |||
| const std::vector<std::tuple<std::string, std::vector<uint32_t>>> *check_node_device_list = nullptr, | |||
| const std::vector<std::tuple<std::string, std::vector<uint32_t>>> *check_node_graph_list = nullptr); | |||
| @@ -263,7 +263,7 @@ class DebugServices { | |||
| const std::vector<parameter_t> ¶meter_list); | |||
| #endif | |||
| const void *PreparePrevTensor(uint32_t *prev_num_elements, const std::string &tensor_name); | |||
| const void *PreparePrevTensor(uint64_t *prev_num_elements, const std::string &tensor_name); | |||
| void CheckHistoryErrorCode(int *error_code, bool history_not_found); | |||
| @@ -279,7 +279,7 @@ class DebugServices { | |||
| std::vector<unsigned int> *device_id, std::vector<unsigned int> *root_graph_id, | |||
| bool error_on_no_value = false); | |||
| void AddOpOverflowOpNames(const std::string overflow_bin_path, std::vector<std::string> *op_names); | |||
| void AddOpOverflowOpNames(const std::string &overflow_bin_path, std::vector<std::string> *op_names); | |||
| void CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<int> *condition, | |||
| std::vector<unsigned int> *const watchpoint_id, | |||
| @@ -363,7 +363,7 @@ class DebugServices { | |||
| bool error_on_no_value = false); | |||
| const void *GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bool previous_iter_tensor_needed, | |||
| uint32_t *prev_num_elements, bool *history_not_found); | |||
| uint64_t *prev_num_elements, bool *history_not_found); | |||
| void ReadTensorFromNpy(const std::string &tensor_name, const std::string &file_name, std::string *const tensor_type, | |||
| std::size_t *const size, std::vector<int64_t> *const shape, | |||
| @@ -770,7 +770,7 @@ void Debugger::SendHeartbeat(int32_t period) { | |||
| while (enable_heartbeat_) { | |||
| MS_EXCEPTION_IF_NULL(grpc_client_); | |||
| EventReply reply = grpc_client_->SendHeartbeat(heartbeat); | |||
| if (reply.status() != reply.OK) { | |||
| if (reply.status() != EventReply::OK) { | |||
| MS_LOG(ERROR) << "Error: SendHeartbeat failed"; | |||
| num_heartbeat_fail++; | |||
| if (num_heartbeat_fail >= max_num_heartbeat_fail) { | |||
| @@ -801,7 +801,7 @@ void Debugger::SendGraphAndSuspend(const GraphProto &graph_proto) { | |||
| // send graph to MindInsight server | |||
| MS_EXCEPTION_IF_NULL(grpc_client_); | |||
| EventReply reply = grpc_client_->SendGraph(graph_proto); | |||
| if (reply.status() != reply.OK) { | |||
| if (reply.status() != EventReply::OK) { | |||
| MS_LOG(ERROR) << "Error: SendGraph failed"; | |||
| } | |||
| // enter command loop, wait and process commands | |||
| @@ -827,7 +827,7 @@ bool Debugger::SendMetadata(bool version_check) { | |||
| EventReply reply_metadata = grpc_client_->SendMetadata(metadata); | |||
| bool ret = false; | |||
| if (reply_metadata.status() == reply_metadata.OK) { | |||
| if (reply_metadata.status() == EventReply::OK) { | |||
| if (version_check) { | |||
| // get type of the command in meta data reply, it should be version matched | |||
| DebuggerCommand cmd = GetCommand(reply_metadata); | |||
| @@ -885,7 +885,7 @@ void Debugger::SendMultiGraphsAndSuspend(const std::list<GraphProto> &graph_prot | |||
| } | |||
| } | |||
| EventReply reply = grpc_client_->SendMultiGraphs(chunked_graph_proto_list); | |||
| if (reply.status() != reply.OK) { | |||
| if (reply.status() != EventReply::OK) { | |||
| MS_LOG(ERROR) << "Error: SendGraph failed"; | |||
| } | |||
| // enter command loop, wait and process commands | |||
| @@ -923,7 +923,7 @@ void Debugger::CommandLoop() { | |||
| // wait for command | |||
| MS_EXCEPTION_IF_NULL(grpc_client_); | |||
| EventReply reply = grpc_client_->WaitForCommand(metadata); | |||
| if (reply.status() != reply.OK) { | |||
| if (reply.status() != EventReply::OK) { | |||
| MS_LOG(ERROR) << "Error: WaitForCommand failed"; | |||
| num_wait_fail++; | |||
| if (num_wait_fail > max_num_wait_fail) { | |||
| @@ -1327,7 +1327,7 @@ void Debugger::SendWatchpoints(const std::list<WatchpointHit> &points) { | |||
| if (!points.empty()) { | |||
| MS_EXCEPTION_IF_NULL(grpc_client_); | |||
| EventReply reply = grpc_client_->SendWatchpointHits(points); | |||
| if (reply.status() != reply.OK) { | |||
| if (reply.status() != EventReply::OK) { | |||
| MS_LOG(ERROR) << "Error: SendWatchpointHits failed"; | |||
| } | |||
| } | |||
| @@ -18,6 +18,7 @@ | |||
| #include <algorithm> | |||
| #include <chrono> | |||
| namespace mindspore { | |||
| DbgServices::DbgServices() { debug_services_ = std::make_shared<DebugServices>(); } | |||
| DbgServices::DbgServices(const DbgServices &other) { | |||
| @@ -34,10 +35,7 @@ DbgServices &DbgServices::operator=(const DbgServices &other) { | |||
| } | |||
| #if !defined(__APPLE__) | |||
| DbgServices::~DbgServices() noexcept { | |||
| MS_LOG(INFO) << "cpp DbgServices object is deleted"; | |||
| debug_services_ = nullptr; | |||
| } | |||
| DbgServices::~DbgServices() { ClearData(); } | |||
| #else | |||
| DbgServices::~DbgServices() { | |||
| MS_LOG(INFO) << "cpp DbgServices object is deleted"; | |||
| @@ -45,6 +43,11 @@ DbgServices::~DbgServices() { | |||
| } | |||
| #endif | |||
| void DbgServices::ClearData() noexcept { | |||
| MS_LOG(INFO) << "cpp DbgServices object is deleted"; | |||
| debug_services_ = nullptr; | |||
| } | |||
| std::string DbgServices::GetVersion() const { | |||
| MS_LOG(INFO) << "get version is called"; | |||
| return MSVERSION; | |||
| @@ -74,7 +77,7 @@ int32_t DbgServices::Initialize(const std::string net_name, const std::string du | |||
| } | |||
| int32_t DbgServices::AddWatchpoint( | |||
| unsigned int id, unsigned int watch_condition, | |||
| unsigned int id, int watch_condition, | |||
| std::map<std::string, std::map<std::string, std::variant<bool, std::vector<std::string>>>> check_nodes, | |||
| std::vector<parameter_t> parameter_list) { | |||
| MS_EXCEPTION_IF_NULL(debug_services_); | |||
| @@ -93,7 +96,7 @@ int32_t DbgServices::AddWatchpoint( | |||
| std::vector<std::uint32_t> rank_id; | |||
| (void)std::transform( | |||
| rank_id_str.begin(), rank_id_str.end(), std::back_inserter(rank_id), | |||
| [](std::string &id_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(id_str)); }); | |||
| [](const std::string &id_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(id_str)); }); | |||
| MS_LOG(DEBUG) << "cpp DbgServices AddWatchpoint rank_id: "; | |||
| for (auto const &i : rank_id) { | |||
| MS_LOG(DEBUG) << i << " "; | |||
| @@ -103,7 +106,7 @@ int32_t DbgServices::AddWatchpoint( | |||
| std::vector<std::uint32_t> root_graph_id; | |||
| (void)std::transform( | |||
| root_graph_id_str.begin(), root_graph_id_str.end(), std::back_inserter(root_graph_id), | |||
| [](std::string &graph_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(graph_str)); }); | |||
| [](const std::string &graph_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(graph_str)); }); | |||
| MS_LOG(DEBUG) << "cpp DbgServices AddWatchpoint root_graph_id: "; | |||
| for (auto const &j : root_graph_id) { | |||
| MS_LOG(DEBUG) << j << " "; | |||
| @@ -293,7 +296,8 @@ std::vector<tensor_data_t> DbgServices::ReadTensors(const std::vector<tensor_inf | |||
| result_list = ReadTensorsUtil(info); | |||
| for (auto result : result_list) { | |||
| MS_EXCEPTION_IF_NULL(result); | |||
| tensor_data_t tensor_data_item(result->GetDataPtr(), result->GetByteSize(), result->GetType(), result->GetShape()); | |||
| tensor_data_t tensor_data_item(result->GetDataPtr(), result->GetByteSize(), static_cast<int>(result->GetType()), | |||
| result->GetShape()); | |||
| tensors_read.push_back(tensor_data_item); | |||
| } | |||
| return tensors_read; | |||
| @@ -310,7 +314,7 @@ std::vector<TensorBaseData> DbgServices::ReadTensorsBase(const std::vector<tenso | |||
| tensors_read_base.push_back(tensor_data_item); | |||
| continue; | |||
| } | |||
| TensorBaseData tensor_data_item(result->GetByteSize(), result->GetType(), result->GetShape()); | |||
| TensorBaseData tensor_data_item(result->GetByteSize(), static_cast<int>(result->GetType()), result->GetShape()); | |||
| tensors_read_base.push_back(tensor_data_item); | |||
| } | |||
| return tensors_read_base; | |||
| @@ -346,3 +350,4 @@ std::vector<TensorStatData> DbgServices::ReadTensorsStat(const std::vector<tenso | |||
| return tensors_read_stat; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -32,6 +32,7 @@ | |||
| namespace py = pybind11; | |||
| namespace common = mindspore::common; | |||
| namespace mindspore { | |||
| struct parameter_t { | |||
| parameter_t(const std::string &name, bool disabled, double value, bool hit, double actual_value) | |||
| : name(name), disabled(disabled), value(value), hit(hit), actual_value(actual_value) {} | |||
| @@ -157,13 +158,13 @@ struct TensorStatData { | |||
| const double max_value() const { return max_value_; } | |||
| const double min_value() const { return min_value_; } | |||
| const double avg_value() const { return avg_value_; } | |||
| const int count() const { return count_; } | |||
| const int neg_zero_count() const { return neg_zero_count_; } | |||
| const int pos_zero_count() const { return pos_zero_count_; } | |||
| const int nan_count() const { return nan_count_; } | |||
| const int neg_inf_count() const { return neg_inf_count_; } | |||
| const int pos_inf_count() const { return pos_inf_count_; } | |||
| const int zero_count() const { return zero_count_; } | |||
| const uint64_t count() const { return count_; } | |||
| const uint64_t neg_zero_count() const { return neg_zero_count_; } | |||
| const uint64_t pos_zero_count() const { return pos_zero_count_; } | |||
| const uint64_t nan_count() const { return nan_count_; } | |||
| const uint64_t neg_inf_count() const { return neg_inf_count_; } | |||
| const uint64_t pos_inf_count() const { return pos_inf_count_; } | |||
| const uint64_t zero_count() const { return zero_count_; } | |||
| uint64_t data_size_; | |||
| int dtype_; | |||
| @@ -172,13 +173,13 @@ struct TensorStatData { | |||
| double max_value_; | |||
| double min_value_; | |||
| double avg_value_; | |||
| int count_; | |||
| int neg_zero_count_; | |||
| int pos_zero_count_; | |||
| int nan_count_; | |||
| int neg_inf_count_; | |||
| int pos_inf_count_; | |||
| int zero_count_; | |||
| uint64_t count_; | |||
| uint64_t neg_zero_count_; | |||
| uint64_t pos_zero_count_; | |||
| uint64_t nan_count_; | |||
| uint64_t neg_inf_count_; | |||
| uint64_t pos_inf_count_; | |||
| uint64_t zero_count_; | |||
| }; | |||
| class DbgServices { | |||
| @@ -195,7 +196,7 @@ class DbgServices { | |||
| uint64_t max_mem_usage); | |||
| int32_t AddWatchpoint( | |||
| unsigned int id, unsigned int watch_condition, | |||
| unsigned int id, int watch_condition, | |||
| std::map<std::string, std::map<std::string, std::variant<bool, std::vector<std::string>>>> check_nodes, | |||
| std::vector<parameter_t> parameter_list); | |||
| @@ -215,6 +216,7 @@ class DbgServices { | |||
| private: | |||
| std::shared_ptr<DebugServices> debug_services_ = nullptr; | |||
| void ClearData() noexcept; | |||
| }; | |||
| } // namespace mindspore | |||
| #endif // DEBUG_DBG_SERVICES_H_ | |||
| @@ -18,6 +18,7 @@ | |||
| #include "pybind11/stl_bind.h" | |||
| #include "debugger/offline_debug/dbg_services.h" | |||
| namespace mindspore { | |||
| PYBIND11_MODULE(_mindspore_offline_debug, m) { | |||
| m.doc() = "pybind11 debug services api"; | |||
| (void)py::class_<DbgServices>(m, "DbgServices") | |||
| @@ -90,3 +91,4 @@ PYBIND11_MODULE(_mindspore_offline_debug, m) { | |||
| .def("pos_inf_count", &TensorStatData::pos_inf_count) | |||
| .def("zero_count", &TensorStatData::zero_count); | |||
| } | |||
| } // namespace mindspore | |||
| @@ -514,7 +514,7 @@ void DebuggerProtoExporter::ExportValueNodes(const std::map<AnfNodePtr, size_t> | |||
| } | |||
| } | |||
| void DebuggerProtoExporter::InitModelInfo() { model_.set_ir_version(debugger::IR_VERSION); } | |||
| void DebuggerProtoExporter::InitModelInfo() { model_.set_ir_version(static_cast<int64_t>(debugger::IR_VERSION)); } | |||
| debugger::ModelProto GetDebuggerFuncGraphProto(const FuncGraphPtr &func_graph) { | |||
| DebuggerProtoExporter exporter; | |||
| @@ -92,7 +92,7 @@ double VarianceAndMeanCalculator::GetStandardDeviation() { return sqrt(GetVarian | |||
| template <typename T> | |||
| TensorSummary<T>::TensorSummary(const void *current_tensor_ptr, const void *const previous_tensor_ptr, | |||
| uint32_t num_elements, uint32_t prev_num_elements) | |||
| uint64_t num_elements, uint64_t prev_num_elements) | |||
| : current_tensor_ptr_(reinterpret_cast<const T *>(current_tensor_ptr)), | |||
| prev_tensor_ptr_(reinterpret_cast<const T *>(previous_tensor_ptr)), | |||
| num_elements_(num_elements), | |||
| @@ -105,13 +105,13 @@ class ITensorSummary { | |||
| virtual const double max_value() const = 0; | |||
| virtual const double min_value() const = 0; | |||
| virtual const double avg_value() const = 0; | |||
| virtual const int count() const = 0; | |||
| virtual const int neg_zero_count() const = 0; | |||
| virtual const int pos_zero_count() const = 0; | |||
| virtual const int nan_count() const = 0; | |||
| virtual const int neg_inf_count() const = 0; | |||
| virtual const int pos_inf_count() const = 0; | |||
| virtual const int zero_count() const = 0; | |||
| virtual const uint64_t count() const = 0; | |||
| virtual const uint64_t neg_zero_count() const = 0; | |||
| virtual const uint64_t pos_zero_count() const = 0; | |||
| virtual const uint64_t nan_count() const = 0; | |||
| virtual const uint64_t neg_inf_count() const = 0; | |||
| virtual const uint64_t pos_inf_count() const = 0; | |||
| virtual const uint64_t zero_count() const = 0; | |||
| }; | |||
| template <typename T> | |||
| @@ -119,7 +119,7 @@ class TensorSummary : public ITensorSummary { | |||
| public: | |||
| TensorSummary() = default; | |||
| ~TensorSummary() override = default; | |||
| TensorSummary(const void *, const void *, uint32_t, uint32_t); | |||
| TensorSummary(const void *, const void *, uint64_t, uint64_t); | |||
| void SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &) override; | |||
| // returns hit, error_code, parameter_list | |||
| std::tuple<bool, int, std::vector<DebugServices::parameter_t>> IsWatchpointHit(DebugServices::watchpoint_t) override; | |||
| @@ -128,30 +128,30 @@ class TensorSummary : public ITensorSummary { | |||
| const double max_value() const override { return max_; } | |||
| const double min_value() const override { return min_; } | |||
| const double avg_value() const override { return avg_; } | |||
| const int count() const override { return num_elements_; } | |||
| const int neg_zero_count() const override { return neg_zero_count_; } | |||
| const int pos_zero_count() const override { return pos_zero_count_; } | |||
| const int nan_count() const override { return nan_count_; } | |||
| const int neg_inf_count() const override { return neg_inf_count_; } | |||
| const int pos_inf_count() const override { return pos_inf_count_; } | |||
| const int zero_count() const override { return zero_count_; } | |||
| const uint64_t count() const override { return num_elements_; } | |||
| const uint64_t neg_zero_count() const override { return neg_zero_count_; } | |||
| const uint64_t pos_zero_count() const override { return pos_zero_count_; } | |||
| const uint64_t nan_count() const override { return nan_count_; } | |||
| const uint64_t neg_inf_count() const override { return neg_inf_count_; } | |||
| const uint64_t pos_inf_count() const override { return pos_inf_count_; } | |||
| const uint64_t zero_count() const override { return zero_count_; } | |||
| private: | |||
| const T *current_tensor_ptr_; | |||
| const T *prev_tensor_ptr_; | |||
| uint32_t num_elements_; | |||
| uint32_t prev_num_elements_; | |||
| uint64_t num_elements_; | |||
| uint64_t prev_num_elements_; | |||
| double min_; | |||
| double max_; | |||
| double avg_; | |||
| bool is_bool_; | |||
| uint32_t neg_zero_count_; | |||
| uint32_t pos_zero_count_; | |||
| uint32_t pos_inf_count_; | |||
| uint32_t neg_inf_count_; | |||
| uint32_t inf_count_; | |||
| uint32_t nan_count_; | |||
| uint32_t zero_count_; | |||
| uint64_t neg_zero_count_; | |||
| uint64_t pos_zero_count_; | |||
| uint64_t pos_inf_count_; | |||
| uint64_t neg_inf_count_; | |||
| uint64_t inf_count_; | |||
| uint64_t nan_count_; | |||
| uint64_t zero_count_; | |||
| double epsilon_; | |||
| bool mean_sd_cal_enabled_; | |||
| VarianceAndMeanCalculator current_mean_variance_; | |||
| @@ -125,7 +125,7 @@ void CheckIfValidType(const TypePtr &type) { | |||
| } | |||
| } | |||
| void SetTensorType(const TypePtr &type, const BaseShapePtr &shape, irpb::TypeProto *type_proto) { | |||
| void SetTensorType(const TypePtr &type, const BaseShapePtr &shape, irpb::TypeProto *const type_proto) { | |||
| TypePtr elem_type = dyn_cast<TensorType>(type)->element(); | |||
| type_proto->mutable_tensor_type()->set_elem_type(GetNumberDataType(elem_type)); | |||
| type_proto->set_data_type(irpb::DT_TENSOR); | |||
| @@ -189,7 +189,7 @@ void ProtoExporter::SetNodeOutputType(const AnfNodePtr &node, irpb::TypeProto *t | |||
| SetNodeOutputType(node->Type(), node->Shape(), type_proto); | |||
| } | |||
| void ProtoExporter::SetValueToProtoBasicTypes(const ValuePtr &val, irpb::ValueProto *value_proto) { | |||
| void ProtoExporter::SetValueToProtoBasicTypes(const ValuePtr &val, irpb::ValueProto *const value_proto) { | |||
| if (val->isa<StringImm>()) { | |||
| const StringImmPtr &value = dyn_cast<StringImm>(val); | |||
| value_proto->set_dtype(irpb::DT_STRING); | |||
| @@ -580,7 +580,7 @@ void ProtoExporter::ExportValueNodes(const std::map<AnfNodePtr, size_t> &const_m | |||
| } | |||
| } | |||
| void ProtoExporter::InitModelInfo() { model_.set_ir_version(irpb::IR_VERSION); } | |||
| void ProtoExporter::InitModelInfo() { model_.set_ir_version(static_cast<int64_t>(irpb::IR_VERSION)); } | |||
| std::string GetFuncGraphProtoString(const FuncGraphPtr &func_graph) { | |||
| ProtoExporter exporter; | |||
| @@ -28,7 +28,7 @@ enum RdrModes : int { Exceptional = 1, Normal = 2 }; | |||
| class EnvConfigParser { | |||
| public: | |||
| static EnvConfigParser &GetInstance() { | |||
| static EnvConfigParser instance; | |||
| static EnvConfigParser instance = EnvConfigParser(); | |||
| instance.Parse(); | |||
| return instance; | |||
| } | |||
| @@ -42,7 +42,7 @@ class EnvConfigParser { | |||
| int RdrMode() const { return rdr_mode_; } | |||
| std::string RdrPath() const { return rdr_path_; } | |||
| #endif | |||
| bool GetSysMemreuse() { return sys_memreuse_; } | |||
| bool GetSysMemreuse() const { return sys_memreuse_; } | |||
| void SetSysMemreuse(bool set_memreuse) { sys_memreuse_ = set_memreuse; } | |||
| private: | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * Copyright 2019-2022 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -19,7 +19,6 @@ | |||
| #include <algorithm> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <cstring> | |||
| #include <iostream> | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #ifdef ONLINE_DBG_MODE | |||
| @@ -162,7 +161,6 @@ class TensorData { | |||
| this->name_ = obj.name_; | |||
| this->execution_order_ = obj.execution_order_; | |||
| this->slot_ = obj.slot_; | |||
| this->data_ptr_ = obj.data_ptr_; | |||
| this->size_ = obj.size_; | |||
| this->data_type_ = obj.data_type_; | |||
| this->data_type_size_ = obj.data_type_size_; | |||
| @@ -177,18 +175,19 @@ class TensorData { | |||
| #endif | |||
| } | |||
| #ifdef OFFLINE_DBG_MODE | |||
| ~TensorData() { DeleteDataPtr(); } | |||
| #else | |||
| ~TensorData() {} | |||
| #endif | |||
| void DeleteDataPtr() { | |||
| if (this->data_ptr_ != NULL) { | |||
| void DeleteDataPtr() noexcept { | |||
| #ifdef ONLINE_DBG_MODE | |||
| this->tensor_ptr_ = nullptr; | |||
| this->data_ptr_ = nullptr; | |||
| #else | |||
| if (this->data_ptr_ != nullptr) { | |||
| delete this->data_ptr_; | |||
| this->data_ptr_ = NULL; | |||
| this->data_ptr_ = nullptr; | |||
| this->size_ = 0; | |||
| } | |||
| #endif | |||
| } | |||
| std::string GetName() const { return this->name_; } | |||
| @@ -206,7 +205,7 @@ class TensorData { | |||
| void SetTimeStamp(const std::string &time_stamp) { this->time_stamp_ = time_stamp; } | |||
| #ifdef ONLINE_DBG_MODE | |||
| void SetTensor(mindspore::tensor::TensorPtr out_tensor) { this->tensor_ptr_ = out_tensor; } | |||
| void SetTensor(const mindspore::tensor::TensorPtr &out_tensor) { this->tensor_ptr_ = out_tensor; } | |||
| #endif | |||
| void SetSlot(size_t slot) { this->slot_ = slot; } | |||
| @@ -215,7 +214,7 @@ class TensorData { | |||
| void SetDataPtr(char *data_ptr) { this->data_ptr_ = data_ptr; } | |||
| uint32_t GetNumElements() { return size_ / data_type_size_; } | |||
| uint64_t GetNumElements() const { return size_ / data_type_size_; } | |||
| uint64_t GetByteSize() const { return this->size_; } | |||
| @@ -223,7 +222,7 @@ class TensorData { | |||
| std::vector<int64_t> GetShape() const { return this->shape_; } | |||
| void SetShape(std::vector<int64_t> shape) { this->shape_ = shape; } | |||
| void SetShape(const std::vector<int64_t> &shape) { this->shape_ = shape; } | |||
| unsigned int GetIteration() const { return this->iteration_; } | |||
| @@ -245,7 +244,7 @@ class TensorData { | |||
| void SetType(unsigned int type) { ConvertMsToDbgType(type); } | |||
| void SetType(std::string type_name) { ConvertStringToDbgType(type_name); } | |||
| void SetType(const std::string &type_name) { ConvertStringToDbgType(type_name); } | |||
| bool GetIsOutput() const { return this->is_output_; } | |||
| @@ -40,7 +40,7 @@ class TensorLoader { | |||
| ~TensorLoader() { EmptyTensor(); } | |||
| void MoveTensorCurrentToPrev(std::string tensor_name) { | |||
| void MoveTensorCurrentToPrev(const std::string &tensor_name) { | |||
| auto handle = tensor_list_map_.extract(tensor_name); | |||
| if (!handle.empty()) { | |||
| MS_LOG(INFO) << "Moving " << tensor_name << " from current map to previous map"; | |||
| @@ -50,12 +50,14 @@ class TensorLoader { | |||
| void SwapCurrentPrev() { tensor_list_map_.swap(prev_tensor_list_map_); } | |||
| bool TensorExistsInCurrent(std::string tensor_name) const { | |||
| bool TensorExistsInCurrent(const std::string &tensor_name) const { | |||
| return tensor_list_map_.find(tensor_name) != tensor_list_map_.end(); | |||
| } | |||
| // only parameters will return true | |||
| bool PrevTensorExistsInCurrent(std::string tensor_name) const { return TensorExistsInCurrent(tensor_name + ":prev"); } | |||
| bool PrevTensorExistsInCurrent(const std::string &tensor_name) const { | |||
| return TensorExistsInCurrent(tensor_name + ":prev"); | |||
| } | |||
| void MoveParametersCurrentToPrev() { | |||
| MS_LOG(INFO) << "Moving parameters from current map to previous map"; | |||
| @@ -85,7 +87,7 @@ class TensorLoader { | |||
| * Description: Load new tensor into tensor_list_map_ (debugger backend cache). In offline debugger, add ":prev" to | |||
| * the previous tensor's name to avoid segfault caused by wrongly evicting the tensor when memory limit is enabled. | |||
| */ | |||
| bool LoadNewTensor(std::shared_ptr<TensorData> tensor, bool keep_prev) { | |||
| bool LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev) { | |||
| lock_.lock(); | |||
| auto tensor_name = tensor->GetName(); | |||
| if (keep_prev) { | |||
| @@ -98,8 +100,9 @@ class TensorLoader { | |||
| } | |||
| std::string key_name = tensor_name; | |||
| #ifdef OFFLINE_DBG_MODE | |||
| std::string output_type = tensor->GetIsOutput() ? "1" : "0"; | |||
| key_name += (":" + std::to_string(tensor->GetDeviceId()) + ":" + std::to_string(tensor->GetRootGraphId()) + ":" + | |||
| std::to_string(tensor->GetIsOutput()) + ":" + std::to_string(tensor->GetSlot())); | |||
| output_type + ":" + std::to_string(tensor->GetSlot())); | |||
| if (tensor_list_map_.find(key_name) != tensor_list_map_.end() && | |||
| tensor->GetIteration() == tensor_list_map_[key_name]->GetPrevIteration()) { | |||
| key_name += ":prev"; | |||
| @@ -151,7 +154,7 @@ class TensorLoader { | |||
| } | |||
| } | |||
| void EmptyTensor() { | |||
| void EmptyTensor() noexcept { | |||
| std::lock_guard<std::mutex> lg(lock_); | |||
| prev_tensor_list_map_.clear(); | |||
| tensor_list_map_.swap(prev_tensor_list_map_); | |||
| @@ -159,7 +162,7 @@ class TensorLoader { | |||
| void EmptyCurrentTensor() { tensor_list_map_.clear(); } | |||
| bool EnableMemoryControl() { return mem_total_ > 0; } | |||
| bool EnableMemoryControl() const { return mem_total_ > 0; } | |||
| /* | |||
| * Feature group: Offline debugger. | |||
| @@ -210,7 +213,7 @@ class TensorLoader { | |||
| std::unique_lock<std::mutex> lk(mem_lock_); | |||
| while (data_size > mem_total_ - mem_usage_) { | |||
| // wait until there is any not-in-use candidate to be evicted from cache | |||
| evict_cond.wait(lk, [&] { return !cache_evict_queue_.empty(); }); | |||
| evict_cond.wait(lk, [this] { return !cache_evict_queue_.empty(); }); | |||
| candidate_name = cache_evict_queue_.front(); | |||
| cache_evict_queue_.pop_front(); | |||
| // evict candidate tensor | |||
| @@ -16,7 +16,6 @@ | |||
| #include "utils/summary/event_writer.h" | |||
| #include <string> | |||
| #include <memory> | |||
| #include "utils/log_adapter.h" | |||
| #include "utils/convert_utils.h" | |||
| @@ -37,7 +36,9 @@ EventWriter::EventWriter(const std::string &file_full_name) : filename_(file_ful | |||
| status_ = true; | |||
| } | |||
| EventWriter::~EventWriter() { | |||
| EventWriter::~EventWriter() { CloseFile(); } | |||
| void EventWriter::CloseFile() noexcept { | |||
| if (event_file_ != nullptr) { | |||
| bool result = Close(); | |||
| if (!result) { | |||
| @@ -72,6 +72,7 @@ class EventWriter { | |||
| bool WriteRecord(const std::string &data); | |||
| private: | |||
| void CloseFile() noexcept; | |||
| // True: valid / False: closed | |||
| bool status_ = false; | |||
| std::shared_ptr<FileSystem> fs_; | |||