Authors: John Tzanakakis, Adel Shafiei, Amir Lashkari, Islam Aminpull/13009/head
| @@ -63,6 +63,16 @@ install( | |||
| COMPONENT mindspore | |||
| ) | |||
| if(CMAKE_SYSTEM_NAME MATCHES "Windows") | |||
| message("offline debugger does not support windows system temporarily") | |||
| else() | |||
| install( | |||
| TARGETS _mindspore_offline_debug | |||
| DESTINATION ${INSTALL_BASE_DIR} | |||
| COMPONENT mindspore | |||
| ) | |||
| endif() | |||
| install( | |||
| TARGETS mindspore_shared_lib | |||
| DESTINATION ${INSTALL_LIB_DIR} | |||
| @@ -317,6 +327,18 @@ if(EXISTS ${CMAKE_SOURCE_DIR}/mindspore/dataset) | |||
| ) | |||
| endif() | |||
| if(CMAKE_SYSTEM_NAME MATCHES "Windows") | |||
| message("offline debugger does not support windows system temporarily") | |||
| else() | |||
| if(EXISTS ${CMAKE_SOURCE_DIR}/mindspore/offline_debug) | |||
| install( | |||
| DIRECTORY ${CMAKE_SOURCE_DIR}/mindspore/offline_debug | |||
| DESTINATION ${INSTALL_PY_DIR} | |||
| COMPONENT mindspore | |||
| ) | |||
| endif() | |||
| endif() | |||
| ## Public header files | |||
| install( | |||
| DIRECTORY ${CMAKE_SOURCE_DIR}/include | |||
| @@ -1,3 +1,6 @@ | |||
| include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/debug/) | |||
| include_directories(${CMAKE_BINARY_DIR}) | |||
| set(_DEBUG_SRC_LIST | |||
| "${CMAKE_CURRENT_SOURCE_DIR}/anf_ir_dump.cc" | |||
| "${CMAKE_CURRENT_SOURCE_DIR}/anf_ir_utils.cc" | |||
| @@ -8,6 +11,14 @@ set(_DEBUG_SRC_LIST | |||
| "${CMAKE_CURRENT_SOURCE_DIR}/env_config_parser.cc" | |||
| ) | |||
| set(_OFFLINE_SRC_LIST | |||
| "${CMAKE_CURRENT_SOURCE_DIR}/debug_services.cc" | |||
| "${CMAKE_CURRENT_SOURCE_DIR}/debugger/tensor_summary.cc" | |||
| "${CMAKE_CURRENT_SOURCE_DIR}/debugger/offline_debug/offline_logger.cc" | |||
| "${CMAKE_CURRENT_SOURCE_DIR}/debugger/offline_debug/dbg_services.cc" | |||
| "${CMAKE_CURRENT_SOURCE_DIR}/debugger/offline_debug/mi_pybind_register.cc" | |||
| ) | |||
| if(ENABLE_DUMP_IR) | |||
| file(GLOB_RECURSE _RDR_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "rdr/*.cc") | |||
| if(NOT ENABLE_D) | |||
| @@ -38,3 +49,13 @@ endif() | |||
| set_property(SOURCE ${_DEBUG_SRC_LIST} ${_RDR_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | |||
| SUBMODULE_ID=mindspore::SubModuleId::SM_DEBUG) | |||
| add_library(_mindspore_debug_obj OBJECT ${_DEBUG_SRC_LIST} ${_RDR_SRC_LIST}) | |||
| if(NOT CMAKE_SYSTEM_NAME MATCHES "Windows") | |||
| add_compile_options(-Wall -DOFFLINE_DBG_MODE -fPIC -O2) | |||
| set_property(SOURCE ${_OFFLINE_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | |||
| SUBMODULE_ID=mindspore::SubModuleId::SM_OFFLINE_DEBUG) | |||
| add_library(_mindspore_offline_debug SHARED ${_OFFLINE_SRC_LIST}) | |||
| set_target_properties(_mindspore_offline_debug PROPERTIES | |||
| PREFIX "${PYTHON_MODULE_PREFIX}" | |||
| SUFFIX "${PYTHON_MODULE_EXTENSION}" | |||
| ) | |||
| endif() | |||
| @@ -13,14 +13,19 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "debug/debug_services.h" | |||
| #include <dirent.h> | |||
| #include <fstream> | |||
| #include <algorithm> | |||
| #include <map> | |||
| #include <unordered_set> | |||
| #ifdef ONLINE_DBG_MODE | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "debug/debug_services.h" | |||
| #endif | |||
| #include "debug/debugger/tensor_summary.h" | |||
| #ifdef ONLINE_DBG_MODE | |||
| namespace mindspore { | |||
| #endif | |||
| DebugServices::DebugServices() { | |||
| tensor_loader_ = new TensorLoader(); | |||
| uint32_t iter_num = -1; | |||
| @@ -42,9 +47,11 @@ DebugServices &DebugServices::operator=(const DebugServices &other) { | |||
| DebugServices::~DebugServices() { delete tensor_loader_; } | |||
| void DebugServices::AddWatchpoint(unsigned int id, unsigned int watch_condition, float parameter, | |||
| const std::vector<std::tuple<std::string, bool>> &check_node_list, | |||
| const std::vector<parameter_t> ¶meter_list) { | |||
| void DebugServices::AddWatchpoint( | |||
| unsigned int id, unsigned int watch_condition, float parameter, | |||
| const std::vector<std::tuple<std::string, bool>> &check_node_list, const std::vector<parameter_t> ¶meter_list, | |||
| const std::vector<std::tuple<std::string, std::vector<uint32_t>>> *check_node_device_list, | |||
| const std::vector<std::tuple<std::string, std::vector<uint32_t>>> *check_node_graph_list) { | |||
| std::lock_guard<std::mutex> lg(lock_); | |||
| watchpoint_t watchpoint_item; | |||
| @@ -52,6 +59,12 @@ void DebugServices::AddWatchpoint(unsigned int id, unsigned int watch_condition, | |||
| watchpoint_item.condition.type = static_cast<CONDITION_TYPE>(watch_condition); | |||
| watchpoint_item.condition.parameter = parameter; | |||
| watchpoint_item.check_node_list = check_node_list; | |||
| if (check_node_device_list != nullptr) { | |||
| watchpoint_item.check_node_device_list = *check_node_device_list; | |||
| } | |||
| if (check_node_graph_list != nullptr) { | |||
| watchpoint_item.check_node_graph_list = *check_node_graph_list; | |||
| } | |||
| watchpoint_item.parameter_list = parameter_list; | |||
| watchpoint_table[id] = watchpoint_item; | |||
| } | |||
| @@ -61,122 +74,170 @@ void DebugServices::RemoveWatchpoint(unsigned int id) { | |||
| watchpoint_table.erase(id); | |||
| } | |||
| std::unique_ptr<ITensorSummary> GetSummaryPtr(const std::shared_ptr<TensorData> &tensor, void *previous_tensor_ptr, | |||
| uint32_t num_elements, int tensor_dtype) { | |||
| switch (tensor_dtype) { | |||
| case DbgDataType::DT_UINT8: { | |||
| return std::make_unique<TensorSummary<uint8_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements); | |||
| } | |||
| case DbgDataType::DT_INT8: { | |||
| return std::make_unique<TensorSummary<int8_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements); | |||
| } | |||
| case DbgDataType::DT_UINT16: { | |||
| return std::make_unique<TensorSummary<uint16_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements); | |||
| } | |||
| case DbgDataType::DT_INT16: { | |||
| return std::make_unique<TensorSummary<int16_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements); | |||
| } | |||
| case DbgDataType::DT_UINT32: { | |||
| return std::make_unique<TensorSummary<uint32_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements); | |||
| } | |||
| case DbgDataType::DT_INT32: | |||
| case DbgDataType::DT_BASE_INT: { | |||
| return std::make_unique<TensorSummary<int32_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements); | |||
| } | |||
| case DbgDataType::DT_UINT64: { | |||
| return std::make_unique<TensorSummary<uint64_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements); | |||
| } | |||
| case DbgDataType::DT_INT64: { | |||
| return std::make_unique<TensorSummary<int64_t>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements); | |||
| } | |||
| case DbgDataType::DT_FLOAT16: { | |||
| return std::make_unique<TensorSummary<float16>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements); | |||
| } | |||
| case DbgDataType::DT_FLOAT32: | |||
| case DbgDataType::DT_BASE_FLOAT: { | |||
| return std::make_unique<TensorSummary<float>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements); | |||
| } | |||
| case DbgDataType::DT_FLOAT64: { | |||
| return std::make_unique<TensorSummary<double>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements); | |||
| } | |||
| case DbgDataType::DT_BOOL: { | |||
| return std::make_unique<TensorSummary<bool>>(tensor->GetDataPtr(), previous_tensor_ptr, num_elements); | |||
| } | |||
| default: | |||
| MS_LOG(INFO) << "Unsupported tensor type"; | |||
| // return a null pointer | |||
| return std::unique_ptr<TensorSummary<int32_t>>{}; | |||
| } | |||
| } | |||
| #ifdef OFFLINE_DBG_MODE | |||
| void *DebugServices::GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bool previous_iter_tensor_needed) { | |||
| void *previous_tensor_ptr = nullptr; | |||
| std::shared_ptr<TensorData> tensor_prev; | |||
| if (previous_iter_tensor_needed && tensor->GetIteration() > 1) { | |||
| // read data in offline mode | |||
| std::vector<std::shared_ptr<TensorData>> result_list_prev; | |||
| ReadDumpedTensor(std::vector<std::string>{tensor->GetName()}, std::vector<size_t>{tensor->GetSlot()}, | |||
| std::vector<unsigned int>{tensor->GetDeviceId()}, | |||
| std::vector<unsigned int>{tensor->GetIteration() - 1}, | |||
| std::vector<unsigned int>{tensor->GetRootGraphId()}, &result_list_prev); | |||
| tensor_prev = result_list_prev[0]; | |||
| if (!tensor_prev->GetByteSize()) { | |||
| tensor_prev.reset(); | |||
| } else { | |||
| previous_tensor_ptr = tensor_prev->GetDataPtr(); | |||
| } | |||
| } | |||
| return previous_tensor_ptr; | |||
| } | |||
| #endif | |||
| void DebugServices::AddWatchPointsToCheck(bool init_dbg_suspend, bool step_end, bool recheck, | |||
| const std::string &tensor_name, const std::string &tensor_name_no_slot, | |||
| bool *previous_iter_tensor_needed, std::string *qualified_tensor_name, | |||
| std::vector<watchpoint_t> *watchpoints_to_check) { | |||
| for (auto w_table_item : watchpoint_table) { | |||
| auto wp = std::get<1>(w_table_item); | |||
| // check ONLY init conditions on initial suspended state. | |||
| // skip other conditions on initial suspended state | |||
| if (init_dbg_suspend && (wp.condition.type != INIT)) continue; | |||
| // skip init condition if not init suspend | |||
| if ((wp.condition.type == INIT) && !init_dbg_suspend) continue; | |||
| // check change conditions only on step end. | |||
| if (wp.change_condition() && !step_end) continue; | |||
| // if recheck, ignore the cache results and reanalyze everything. | |||
| // if not a recheck, check only unanalyzed tensors | |||
| if (!recheck && wp_id_cache[tensor_name].count(wp.id)) continue; | |||
| std::string found = wp.FindQualifiedTensorName(tensor_name_no_slot); | |||
| if (!found.empty()) { | |||
| *qualified_tensor_name = found; | |||
| watchpoints_to_check->push_back(w_table_item.second); | |||
| #ifdef OFFLINE_DBG_MODE | |||
| if (wp.change_condition()) { | |||
| *previous_iter_tensor_needed = true; | |||
| } | |||
| #endif | |||
| } | |||
| } | |||
| } | |||
| void DebugServices::AddAnalyzedTensorToCache(const bool recheck, const unsigned int id, | |||
| const std::string &tensor_name) { | |||
| // add analyzed tensor to cache | |||
| if (!recheck) { | |||
| wp_id_cache[tensor_name].insert(id); | |||
| } | |||
| } | |||
| void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, | |||
| std::vector<int> *condition, std::vector<unsigned int> *watchpoint_id, | |||
| std::vector<std::vector<parameter_t>> *parameters, | |||
| std::vector<int32_t> *error_codes, const std::vector<std::string> &op_overflows, | |||
| const std::vector<std::shared_ptr<TensorData>> &tensor_list, | |||
| const bool init_dbg_suspend, const bool step_end, const bool recheck) { | |||
| std::vector<std::shared_ptr<TensorData>> *tensor_list, const bool init_dbg_suspend, | |||
| const bool step_end, const bool recheck, std::vector<unsigned int> *device_id, | |||
| std::vector<unsigned int> *root_graph_id) { | |||
| std::lock_guard<std::mutex> lg(lock_); | |||
| if (watchpoint_table.empty()) return; | |||
| for (const auto &tensor : tensor_list) { | |||
| for (auto &tensor : *tensor_list) { | |||
| #ifdef OFFLINE_DBG_MODE | |||
| // read data in offline mode | |||
| std::vector<std::shared_ptr<TensorData>> result_list; | |||
| ReadDumpedTensor(std::vector<std::string>{tensor->GetName()}, std::vector<size_t>{tensor->GetSlot()}, | |||
| std::vector<unsigned int>{tensor->GetDeviceId()}, | |||
| std::vector<unsigned int>{tensor->GetIteration()}, | |||
| std::vector<unsigned int>{tensor->GetRootGraphId()}, &result_list); | |||
| tensor = result_list[0]; | |||
| if (!tensor->GetByteSize()) { | |||
| tensor.reset(); | |||
| continue; | |||
| } | |||
| #endif | |||
| const auto tensor_name = tensor->GetName(); | |||
| const auto tensor_name_no_slot = tensor_name.substr(0, tensor_name.find_first_of(':')); | |||
| const auto tensor_slot = std::to_string(tensor->GetSlot()); | |||
| mindspore::tensor::TensorPtr tensor_ptr = tensor->GetTensor(); | |||
| // no elements to analyze | |||
| if (tensor_ptr->DataSize() == 0) continue; | |||
| int tensor_dtype = tensor_ptr->data_type_c(); | |||
| if (tensor->GetByteSize() == 0) continue; | |||
| int tensor_dtype = tensor->GetType(); | |||
| std::vector<watchpoint_t> watchpoints_to_check; | |||
| std::string qualified_tensor_name; | |||
| for (auto w_table_item : watchpoint_table) { | |||
| auto wp = std::get<1>(w_table_item); | |||
| // check ONLY init conditions on intial suspended state. | |||
| // skip other conditions on intial suspended state | |||
| if (init_dbg_suspend && (wp.condition.type != INIT)) continue; | |||
| // skip init condition if not init suspend | |||
| if ((wp.condition.type == INIT) && !init_dbg_suspend) continue; | |||
| // check change conditions only on step end. | |||
| if (wp.change_condition() && !step_end) continue; | |||
| // if recheck, ignore the cache results and reanalyze everything. | |||
| // if not a recheck, check only unanalyzed tensors | |||
| if (!recheck && wp_id_cache[tensor_name].count(wp.id)) continue; | |||
| std::string found = wp.FindQualifiedTensorName(tensor_name_no_slot); | |||
| if (!found.empty()) { | |||
| qualified_tensor_name = found; | |||
| watchpoints_to_check.push_back(w_table_item.second); | |||
| } | |||
| } | |||
| bool previous_iter_tensor_needed = false; | |||
| // Add do nothing line in case offline debug is off, prevent unused var warning | |||
| (void)previous_iter_tensor_needed; | |||
| AddWatchPointsToCheck(init_dbg_suspend, step_end, recheck, tensor_name, tensor_name_no_slot, | |||
| &previous_iter_tensor_needed, &qualified_tensor_name, &watchpoints_to_check); | |||
| // no wp set on current tensor | |||
| if (watchpoints_to_check.empty()) continue; | |||
| uint32_t num_elements = tensor_ptr->DataSize(); | |||
| void *previous_tensor_ptr = tensor_loader_->GetPrevTensor(tensor_name) | |||
| ? tensor_loader_->GetPrevTensor(tensor_name)->GetTensor()->data_c() | |||
| : nullptr; | |||
| uint32_t num_elements = tensor->GetNumElements(); | |||
| #ifdef OFFLINE_DBG_MODE | |||
| void *previous_tensor_ptr = GetPrevTensor(tensor, previous_iter_tensor_needed); | |||
| #else | |||
| void *previous_tensor_ptr = | |||
| tensor_loader_->GetPrevTensor(tensor_name) ? tensor_loader_->GetPrevTensor(tensor_name)->GetDataPtr() : nullptr; | |||
| #endif | |||
| std::unique_ptr<ITensorSummary> base_summary_ptr; | |||
| if (!(watchpoints_to_check.size() == 1 && watchpoints_to_check[0].condition.type == IS_OVERFLOW)) { | |||
| switch (tensor_dtype) { | |||
| case kNumberTypeUInt8: { | |||
| base_summary_ptr = | |||
| std::make_unique<TensorSummary<uint8_t>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements); | |||
| break; | |||
| } | |||
| case kNumberTypeInt8: { | |||
| base_summary_ptr = | |||
| std::make_unique<TensorSummary<int8_t>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements); | |||
| break; | |||
| } | |||
| case kNumberTypeUInt16: { | |||
| base_summary_ptr = | |||
| std::make_unique<TensorSummary<uint16_t>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements); | |||
| break; | |||
| } | |||
| case kNumberTypeInt16: { | |||
| base_summary_ptr = | |||
| std::make_unique<TensorSummary<int16_t>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements); | |||
| break; | |||
| } | |||
| case kNumberTypeUInt32: { | |||
| base_summary_ptr = | |||
| std::make_unique<TensorSummary<uint32_t>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements); | |||
| break; | |||
| } | |||
| case kNumberTypeInt32: | |||
| case kNumberTypeInt: { | |||
| base_summary_ptr = | |||
| std::make_unique<TensorSummary<int32_t>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements); | |||
| break; | |||
| } | |||
| case kNumberTypeUInt64: { | |||
| base_summary_ptr = | |||
| std::make_unique<TensorSummary<uint64_t>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements); | |||
| break; | |||
| } | |||
| case kNumberTypeInt64: { | |||
| base_summary_ptr = | |||
| std::make_unique<TensorSummary<int64_t>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements); | |||
| break; | |||
| } | |||
| case kNumberTypeFloat16: { | |||
| base_summary_ptr = | |||
| std::make_unique<TensorSummary<float16>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements); | |||
| break; | |||
| } | |||
| case kNumberTypeFloat32: | |||
| case kNumberTypeFloat: { | |||
| base_summary_ptr = | |||
| std::make_unique<TensorSummary<float>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements); | |||
| break; | |||
| } | |||
| case kNumberTypeFloat64: { | |||
| base_summary_ptr = | |||
| std::make_unique<TensorSummary<double>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements); | |||
| break; | |||
| } | |||
| case kNumberTypeBool: { | |||
| base_summary_ptr = | |||
| std::make_unique<TensorSummary<bool>>(tensor_ptr->data_c(), previous_tensor_ptr, num_elements); | |||
| break; | |||
| } | |||
| default: | |||
| MS_LOG(INFO) << "Unsupported tensor type"; | |||
| continue; | |||
| base_summary_ptr = GetSummaryPtr(tensor, previous_tensor_ptr, num_elements, tensor_dtype); | |||
| if (base_summary_ptr != nullptr) { | |||
| base_summary_ptr->SummarizeTensor(watchpoints_to_check); | |||
| } | |||
| base_summary_ptr->SummarizeTensor(watchpoints_to_check); | |||
| } | |||
| for (auto &wp : watchpoints_to_check) { | |||
| bool is_hit = false; | |||
| int error_code = 0; | |||
| @@ -189,26 +250,439 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector | |||
| error_code = std::get<1>(item); | |||
| parameter_list = std::get<2>(item); | |||
| } | |||
| // add analyzed tensor to cache | |||
| if (!recheck) { | |||
| wp_id_cache[tensor_name].insert(wp.id); | |||
| } | |||
| AddAnalyzedTensorToCache(recheck, wp.id, tensor_name); | |||
| if (is_hit || error_code) { | |||
| name->push_back(qualified_tensor_name); | |||
| slot->push_back(tensor_slot); | |||
| condition->push_back(wp.condition.type); | |||
| watchpoint_id->push_back(wp.id); | |||
| if (device_id != nullptr) { | |||
| device_id->push_back(tensor->GetDeviceId()); | |||
| } | |||
| if (root_graph_id != nullptr) { | |||
| root_graph_id->push_back(tensor->GetRootGraphId()); | |||
| } | |||
| parameters->push_back(parameter_list); | |||
| error_codes->push_back(error_code); | |||
| } | |||
| } | |||
| #ifdef OFFLINE_DBG_MODE | |||
| // in offline mode remove the need for the data | |||
| tensor.reset(); | |||
| #endif | |||
| } | |||
| } | |||
| #ifdef OFFLINE_DBG_MODE | |||
| void DebugServices::GetSlotInfo(const std::string &file_name, const std::string &dump_name, | |||
| const std::string &specific_dump_dir, std::vector<size_t> *slot_list) { | |||
| if (is_sync_mode) { | |||
| // get the slot from the name | |||
| std::string delimiter = "_"; | |||
| unsigned int start_pos = dump_name.length(); | |||
| unsigned int end_pos = file_name.find(delimiter, start_pos); | |||
| std::string item = file_name.substr(start_pos, end_pos - start_pos); | |||
| slot_list->push_back(std::stoul(item)); | |||
| } else { | |||
| std::string out_dir = "/tmp/" + file_name; | |||
| std::string input_file = specific_dump_dir + "/" + file_name; | |||
| std::string log_enabled = DbgLogger::verbose ? "" : "> /dev/null"; | |||
| std::string convert_command = | |||
| "python /usr/local/Ascend/toolkit/tools/operator_cmp/compare/msaccucmp.pyc convert -d " + input_file + " -out " + | |||
| out_dir + " -t bin " + log_enabled; | |||
| (void)(system(convert_command.c_str()) + 1); | |||
| convert_command = "python /usr/local/Ascend/toolkit/tools/operator_cmp/compare/msaccucmp.pyc convert -d " + | |||
| input_file + " -out " + out_dir + " -f NCHW -t bin " + log_enabled; | |||
| (void)(system(convert_command.c_str()) + 1); | |||
| std::string prefix_converted_dump_file_name = file_name + ".output."; | |||
| DIR *convert_dir_ptr = opendir(out_dir.c_str()); | |||
| if (convert_dir_ptr != nullptr) { | |||
| struct dirent *convert_dir_contents = nullptr; | |||
| while ((convert_dir_contents = readdir(convert_dir_ptr)) != NULL) { | |||
| if (convert_dir_contents->d_type == DT_REG) { | |||
| std::string converted_file_name = convert_dir_contents->d_name; | |||
| std::size_t nd_file = converted_file_name.rfind(".ND.bin"); | |||
| std::size_t fractal_z_file = converted_file_name.rfind(".FRACTAL_Z.bin"); | |||
| std::size_t nchw_file = converted_file_name.rfind(".NCHW.bin"); | |||
| if (nd_file == std::string::npos && nchw_file == std::string::npos && fractal_z_file == std::string::npos) { | |||
| continue; | |||
| } | |||
| std::size_t found_c = converted_file_name.find(prefix_converted_dump_file_name); | |||
| if (found_c != 0) { | |||
| continue; | |||
| } | |||
| std::size_t slot_start_pos = prefix_converted_dump_file_name.length(); | |||
| std::size_t slot_end_pos = converted_file_name.find(".", slot_start_pos) - 1; | |||
| std::string slot_item = converted_file_name.substr(slot_start_pos, slot_end_pos - slot_start_pos + 1); | |||
| slot_list->push_back(std::stoul(slot_item)); | |||
| } | |||
| } | |||
| } else { | |||
| MS_LOG(INFO) << out_dir << " directory does not exist!"; | |||
| } | |||
| closedir(convert_dir_ptr); | |||
| // std::string delete_cmd = "rm -rf " + out_dir; | |||
| // system(delete_cmd.c_str()); | |||
| } | |||
| } | |||
| std::size_t DebugServices::GetShapeTypeInfo(const std::string &specific_dump_dir, std::size_t slot, | |||
| const std::string &prefix_dump_file_name, std::string *file_name, | |||
| std::string *type_name, std::string *out_dir, std::vector<int64_t> *shape) { | |||
| std::size_t found = 0; | |||
| if (is_sync_mode) { | |||
| found = file_name->rfind(prefix_dump_file_name, 0); | |||
| } else { | |||
| std::string file_name_w_o_prefix = file_name->substr(file_name->find('.') + 1); | |||
| found = file_name_w_o_prefix.rfind(prefix_dump_file_name, 0); | |||
| } | |||
| if (found != 0) { | |||
| return found; | |||
| } | |||
| if (is_sync_mode) { | |||
| // found a file, now get the shape and type | |||
| // find "_shape_" in the filename | |||
| std::string shape_delimiter = "_shape_"; | |||
| unsigned int str_pos = file_name->find(shape_delimiter) + shape_delimiter.length(); | |||
| // read numbers with '_' delimter until you read a non-number, that will be the type name | |||
| bool number_found = true; | |||
| std::string delimiter = "_"; | |||
| while (number_found) { | |||
| unsigned int end_pos = file_name->find(delimiter, str_pos); | |||
| std::string item = file_name->substr(str_pos, end_pos - str_pos); | |||
| bool is_number = !item.empty() && std::find_if(item.begin(), item.end(), | |||
| [](unsigned char c) { return !std::isdigit(c); }) == item.end(); | |||
| if (is_number) { | |||
| shape->push_back(std::stoul(item)); | |||
| str_pos = end_pos + 1; | |||
| } else { | |||
| *type_name = item; | |||
| number_found = false; | |||
| } | |||
| } | |||
| } else { | |||
| *out_dir = "/tmp/" + *file_name; | |||
| std::string input_file = specific_dump_dir + "/" + *file_name; | |||
| std::string log_enabled = DbgLogger::verbose ? "" : "> /dev/null"; | |||
| std::string convert_command = | |||
| "python /usr/local/Ascend/toolkit/tools/operator_cmp/compare/msaccucmp.pyc convert -d " + input_file + " -out " + | |||
| *out_dir + " -t bin " + log_enabled; | |||
| (void)(system(convert_command.c_str()) + 1); | |||
| convert_command = "python /usr/local/Ascend/toolkit/tools/operator_cmp/compare/msaccucmp.pyc convert -d " + | |||
| input_file + " -out " + *out_dir + " -f NCHW -t bin " + log_enabled; | |||
| (void)(system(convert_command.c_str()) + 1); | |||
| std::string prefix_converted_dump_file_name = *file_name + ".output." + std::to_string(slot); | |||
| *file_name = ""; | |||
| DIR *convert_dir_ptr = opendir(out_dir->c_str()); | |||
| if (convert_dir_ptr != nullptr) { | |||
| struct dirent *convert_dir_contents = nullptr; | |||
| while ((convert_dir_contents = readdir(convert_dir_ptr)) != NULL) { | |||
| if (convert_dir_contents->d_type == DT_REG) { | |||
| std::string converted_file_name = convert_dir_contents->d_name; | |||
| std::size_t nd_file = converted_file_name.rfind(".ND.bin"); | |||
| std::size_t fractal_z_file = converted_file_name.rfind(".FRACTAL_Z.bin"); | |||
| std::size_t nchw_file = converted_file_name.rfind(".NCHW.bin"); | |||
| if (nd_file == std::string::npos && nchw_file == std::string::npos && fractal_z_file == std::string::npos) { | |||
| continue; | |||
| } | |||
| std::size_t found_c = converted_file_name.rfind(prefix_converted_dump_file_name, 0); | |||
| if (found_c != 0) { | |||
| continue; | |||
| } | |||
| *file_name = converted_file_name; | |||
| } | |||
| } | |||
| } else { | |||
| MS_LOG(INFO) << *out_dir << " directory does not exist!"; | |||
| } | |||
| closedir(convert_dir_ptr); | |||
| if (*file_name == "") { | |||
| MS_LOG(WARNING) << out_dir << ": no valid files found post msaccucmp exec"; | |||
| return 1; | |||
| } | |||
| // std::string delete_cmd = "rm -rf " + out_dir; | |||
| // system(delete_cmd.c_str()); | |||
| // found a file, now get the shape and type | |||
| std::stringstream check_filename(*file_name); | |||
| std::vector<std::string> tokens; | |||
| std::string intermediate; | |||
| while (getline(check_filename, intermediate, '.')) { | |||
| tokens.push_back(intermediate); | |||
| } | |||
| *type_name = tokens[8]; | |||
| std::string shape_str = tokens[7]; | |||
| std::stringstream check_shape(shape_str); | |||
| while (getline(check_shape, intermediate, '_')) { | |||
| shape->push_back(std::stoul(intermediate)); | |||
| } | |||
| } | |||
| return 0; | |||
| } | |||
| void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std::vector<size_t> slot, | |||
| std::vector<unsigned int> device_id, std::vector<unsigned int> iteration, | |||
| std::vector<unsigned int> root_graph_id, | |||
| std::vector<std::shared_ptr<TensorData>> *result_list) { | |||
| for (unsigned int i = 0; i < backend_name.size(); i++) { | |||
| // form prefix of the tensor file to read from graph pb node name | |||
| std::string dump_style_kernel_name = backend_name[i]; | |||
| const std::string strsrc = "/"; | |||
| std::string strdst; | |||
| if (is_sync_mode) { | |||
| strdst = "--"; | |||
| } else { | |||
| strdst = "_"; | |||
| } | |||
| std::string::size_type pos = 0; | |||
| std::string::size_type srclen = strsrc.size(); | |||
| std::string::size_type dstlen = strdst.size(); | |||
| // remove slot from name | |||
| std::size_t found_colon = dump_style_kernel_name.find_last_of(":"); | |||
| dump_style_kernel_name = dump_style_kernel_name.substr(0, found_colon); | |||
| while ((pos = dump_style_kernel_name.find(strsrc, pos)) != std::string::npos) { | |||
| dump_style_kernel_name.replace(pos, srclen, strdst); | |||
| pos += dstlen; | |||
| } | |||
| std::string prefix_dump_file_name = dump_style_kernel_name; | |||
| if (is_sync_mode) { | |||
| prefix_dump_file_name += "_output_" + std::to_string(slot[i]) + "_"; | |||
| } | |||
| std::string specific_dump_dir; | |||
| if (is_sync_mode) { | |||
| specific_dump_dir = | |||
| dump_dir + "/device_" + std::to_string(device_id[i]) + "/iteration_" + std::to_string(iteration[i]); | |||
| } else { | |||
| specific_dump_dir = dump_dir + "/device_" + std::to_string(device_id[i]) + "/" + net_name + "_graph_" + | |||
| std::to_string(root_graph_id[i]) + "/" + std::to_string(root_graph_id[i]) + "/" + | |||
| std::to_string(iteration[i]); | |||
| } | |||
| // search files in dir for the one that meets the filename prefix and read the file into memory | |||
| DIR *d; | |||
| d = opendir(specific_dump_dir.c_str()); | |||
| std::vector<char> *buffer = NULL; | |||
| std::string type_name = ""; | |||
| std::vector<int64_t> shape; | |||
| uint64_t data_size = 0; | |||
| if (d != nullptr) { | |||
| struct dirent *dir = nullptr; | |||
| while ((dir = readdir(d)) != NULL) { | |||
| if (dir->d_type == DT_REG) { | |||
| std::string file_name = dir->d_name; | |||
| std::string out_dir; | |||
| std::size_t found = GetShapeTypeInfo(specific_dump_dir, slot[i], prefix_dump_file_name, &file_name, | |||
| &type_name, &out_dir, &shape); | |||
| if (found != 0) { | |||
| continue; | |||
| } | |||
| // read the tensor data from the file | |||
| std::string file_path; | |||
| if (is_sync_mode) { | |||
| file_path = specific_dump_dir + "/" + file_name; | |||
| } else { | |||
| file_path = out_dir + "/" + file_name; | |||
| } | |||
| std::ifstream infile; | |||
| infile.open(file_path.c_str(), std::ios::binary | std::ios::ate); | |||
| if (!infile.is_open()) { | |||
| MS_LOG(ERROR) << "Failed to open bin file " << file_name; | |||
| break; | |||
| } | |||
| uint64_t file_size = infile.tellg(); | |||
| infile.seekg(0, std::ios::beg); | |||
| buffer = new std::vector<char>(file_size); | |||
| if (!infile.read(buffer->data(), file_size)) { | |||
| MS_LOG(ERROR) << "Failed to read in bin file " << file_name; | |||
| break; | |||
| } | |||
| data_size = file_size; | |||
| infile.close(); | |||
| } | |||
| } | |||
| } else { | |||
| MS_LOG(INFO) << "directory does not exist!"; | |||
| } | |||
| closedir(d); | |||
| // call LoadNewTensor to store tensor in internal cache | |||
| auto tensor_data = std::make_shared<TensorData>(); | |||
| tensor_data->SetName(backend_name[i]); | |||
| tensor_data->SetExecutionOrder(0); | |||
| tensor_data->SetSlot(slot[i]); | |||
| tensor_data->SetIteration(iteration[i]); | |||
| tensor_data->SetDeviceId(device_id[i]); | |||
| tensor_data->SetRootGraphId(root_graph_id[i]); | |||
| if (data_size) { | |||
| tensor_data->SetDataPtr(buffer->data()); | |||
| } else { | |||
| tensor_data->SetDataPtr(NULL); | |||
| } | |||
| tensor_data->SetByteSize(data_size); | |||
| tensor_data->SetType(type_name); | |||
| tensor_data->SetShape(shape); | |||
| if (data_size) { | |||
| tensor_loader_->LoadNewTensor(tensor_data, false); | |||
| } | |||
| // add to result_list | |||
| result_list->push_back(tensor_data); | |||
| } | |||
| } | |||
| void ReplaceSrcFileName(const bool is_sync_mode, std::string *dump_style_name) { | |||
| const std::string strsrc = "/"; | |||
| std::string strdst; | |||
| if (is_sync_mode) { | |||
| strdst = "--"; | |||
| } else { | |||
| strdst = "_"; | |||
| } | |||
| std::string::size_type pos = 0; | |||
| std::string::size_type srclen = strsrc.size(); | |||
| std::string::size_type dstlen = strdst.size(); | |||
| while ((pos = dump_style_name->find(strsrc, pos)) != std::string::npos) { | |||
| dump_style_name->replace(pos, srclen, strdst); | |||
| pos += dstlen; | |||
| } | |||
| } | |||
| std::vector<std::shared_ptr<TensorData>> DebugServices::ReadNeededDumpedTensors(unsigned int iteration) { | |||
| // get a list of nodes and the devices they are on to monitor | |||
| std::vector<std::shared_ptr<TensorData>> tensor_list; | |||
| std::map<std::tuple<uint32_t, uint32_t>, std::unordered_set<std::string>> device_and_graph_to_nodes; | |||
| for (auto w_table_item : watchpoint_table) { | |||
| auto wp = std::get<1>(w_table_item); | |||
| for (auto check_node : wp.check_node_list) { | |||
| unsigned int index = 0; | |||
| std::string w_name = std::get<0>(check_node); | |||
| bool w_is_param = std::get<1>(check_node); | |||
| std::string node_name = w_name; | |||
| if (w_is_param) { | |||
| std::size_t found = node_name.find_last_of("/"); | |||
| node_name = node_name.substr(found + 1); | |||
| } | |||
| std::vector<uint32_t> devices = std::get<1>(wp.check_node_device_list[index]); | |||
| std::vector<uint32_t> graphs = std::get<1>(wp.check_node_graph_list[index]); | |||
| for (auto device : devices) { | |||
| for (auto graph : graphs) { | |||
| std::tuple<uint32_t, uint32_t> key(device, graph); | |||
| device_and_graph_to_nodes[key].insert(node_name); | |||
| } | |||
| } | |||
| index++; | |||
| } | |||
| } | |||
| // scan each device/iteration dir for the watched nodes for each device, and add to tensor_list | |||
| // as they are found | |||
| for (auto const &device_and_graph_item : device_and_graph_to_nodes) { | |||
| std::tuple<uint32_t, uint32_t> device_and_graph = device_and_graph_item.first; | |||
| uint32_t device_id = std::get<0>(device_and_graph); | |||
| uint32_t root_graph_id = std::get<1>(device_and_graph); | |||
| std::unordered_set<std::string> wp_nodes = device_and_graph_item.second; | |||
| std::vector<std::tuple<std::string, std::string>> proto_to_dump; | |||
| std::string specific_dump_dir; | |||
| if (is_sync_mode) { | |||
| specific_dump_dir = dump_dir + "/device_" + std::to_string(device_id) + "/iteration_" + std::to_string(iteration); | |||
| } else { | |||
| specific_dump_dir = dump_dir + "/device_" + std::to_string(device_id) + "/" + net_name + "_graph_" + | |||
| std::to_string(root_graph_id) + "/" + std::to_string(root_graph_id) + "/" + | |||
| std::to_string(iteration); | |||
| } | |||
| // convert node names to dump style | |||
| for (auto node : wp_nodes) { | |||
| std::string orig_name = node; | |||
| std::string dump_style_name = node; | |||
| ReplaceSrcFileName(is_sync_mode, &dump_style_name); | |||
| if (is_sync_mode) { | |||
| dump_style_name.append("_output_"); | |||
| } | |||
| proto_to_dump.push_back(std::tuple<std::string, std::string>(orig_name, dump_style_name)); | |||
| } | |||
| // search files in dir for the one that meets the filename prefix and read the file into memory | |||
| DIR *d; | |||
| d = opendir(specific_dump_dir.c_str()); | |||
| if (d != nullptr) { | |||
| struct dirent *dir = nullptr; | |||
| while ((dir = readdir(d)) != NULL) { | |||
| if (dir->d_type == DT_REG) { | |||
| std::string file_name = dir->d_name; | |||
| for (auto &node : proto_to_dump) { | |||
| std::string dump_name = std::get<1>(node); | |||
| std::size_t found = 0; | |||
| if (is_sync_mode) { | |||
| found = file_name.rfind(dump_name, 0); | |||
| } else { | |||
| std::string file_name_w_o_prefix = file_name.substr(file_name.find('.') + 1); | |||
| found = file_name_w_o_prefix.rfind(dump_name, 0); | |||
| } | |||
| if (found == 0) { | |||
| std::vector<size_t> slot_list; | |||
| GetSlotInfo(file_name, dump_name, specific_dump_dir, &slot_list); | |||
| for (auto slot : slot_list) { | |||
| // add a TensorData entry (data will be read when needed) | |||
| std::vector<int64_t> shape; | |||
| std::string orig_name = std::get<0>(node); | |||
| auto tensor_data = std::make_shared<TensorData>(); | |||
| tensor_data->SetName(orig_name); | |||
| tensor_data->SetExecutionOrder(0); | |||
| tensor_data->SetSlot(slot); | |||
| tensor_data->SetIteration(iteration); | |||
| tensor_data->SetDeviceId(device_id); | |||
| tensor_data->SetRootGraphId(root_graph_id); | |||
| tensor_data->SetDataPtr(NULL); | |||
| tensor_data->SetByteSize(0); | |||
| tensor_data->SetType(""); | |||
| tensor_data->SetShape(shape); | |||
| tensor_list.push_back(tensor_data); | |||
| } | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| return tensor_list; | |||
| } | |||
| #endif | |||
| void DebugServices::ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name, | |||
| std::vector<char *> *data_ptr, std::vector<ssize_t> *data_size, | |||
| std::vector<TypePtr> *dtype, std::vector<std::vector<int64_t>> *shape) { | |||
| std::vector<unsigned int> *dtype, std::vector<std::vector<int64_t>> *shape) { | |||
| std::vector<std::tuple<std::string, std::shared_ptr<TensorData>>> result_list; | |||
| tensor_loader_->SearchTensors(name, &result_list); | |||
| @@ -217,13 +691,14 @@ void DebugServices::ReadNodesTensors(std::vector<std::string> name, std::vector< | |||
| continue; | |||
| } | |||
| ret_name->push_back(std::get<0>(result)); | |||
| data_ptr->push_back(reinterpret_cast<char *>(std::get<1>(result)->GetTensor()->data_c())); | |||
| data_size->push_back(std::get<1>(result)->GetTensor()->data().nbytes()); | |||
| dtype->push_back(std::get<1>(result)->GetTensor()->Dtype()); | |||
| shape->push_back(std::get<1>(result)->GetTensor()->shape()); | |||
| data_ptr->push_back(reinterpret_cast<char *>(std::get<1>(result)->GetDataPtr())); | |||
| data_size->push_back(std::get<1>(result)->GetByteSize()); | |||
| dtype->push_back(std::get<1>(result)->GetType()); | |||
| shape->push_back(std::get<1>(result)->GetShape()); | |||
| } | |||
| } | |||
| #ifdef ONLINE_DBG_MODE | |||
| bool DebugServices::IsWatchPoint(const std::string &kernel_name, const CNodePtr &kernel) const { | |||
| bool ret = false; | |||
| for (auto w_table_item : watchpoint_table) { | |||
| @@ -256,6 +731,7 @@ bool DebugServices::IsWatchPointNodeInput(const std::string &w_name, const CNode | |||
| return false; | |||
| } | |||
| } | |||
| #endif | |||
| void DebugServices::EmptyTensor() { tensor_loader_->EmptyTensor(); } | |||
| @@ -273,6 +749,7 @@ void DebugServices::EmptyPrevTensor() { tensor_loader_->EmptyPrevTensor(); } | |||
| void DebugServices::EmptyCurrentTensor() { tensor_loader_->EmptyCurrentTensor(); } | |||
| #ifdef ONLINE_DBG_MODE | |||
| bool DebugServices::DumpTensorToFile(const std::string &tensor_name, bool trans_flag, const std::string &filepath, | |||
| const std::string &host_fmt, const std::vector<int64_t> &host_shape, | |||
| TypeId host_type, TypeId addr_type_id, const std::string &addr_format, | |||
| @@ -280,6 +757,7 @@ bool DebugServices::DumpTensorToFile(const std::string &tensor_name, bool trans_ | |||
| return tensor_loader_->DumpTensorToFile(tensor_name, trans_flag, filepath, host_fmt, host_shape, host_type, | |||
| addr_type_id, addr_format, slot); | |||
| } | |||
| #endif | |||
| bool DebugServices::LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev) { | |||
| return tensor_loader_->LoadNewTensor(tensor, keep_prev); | |||
| @@ -298,6 +776,7 @@ void DebugServices::ResetLoadedTensors() { | |||
| tensor_loader_->SwapCurrentPrev(); | |||
| } | |||
| #ifdef ONLINE_DBG_MODE | |||
| std::vector<std::shared_ptr<TensorData>> DebugServices::GetNodeTensor(const CNodePtr &kernel) { | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| std::vector<std::shared_ptr<TensorData>> result; | |||
| @@ -310,6 +789,8 @@ std::vector<std::shared_ptr<TensorData>> DebugServices::GetNodeTensor(const CNod | |||
| } | |||
| return result; | |||
| } | |||
| #endif | |||
| bool DebugServices::TensorExistsInCurrent(std::string tensor_name) { | |||
| return tensor_loader_->TensorExistsInCurrent(tensor_name); | |||
| } | |||
| @@ -317,4 +798,18 @@ void DebugServices::MoveTensorCurrentToPrev(std::string tensor_name) { | |||
| tensor_loader_->MoveTensorCurrentToPrev(tensor_name); | |||
| } | |||
| void DebugServices::SetNetName(std::string net_name) { this->net_name = net_name; } | |||
| std::string DebugServices::GetNetName() { return net_name; } | |||
| void DebugServices::SetDumpDir(std::string dump_dir) { this->dump_dir = dump_dir; } | |||
| std::string DebugServices::GetDumpDir() { return dump_dir; } | |||
| void DebugServices::SetSyncMode(bool is_sync_mode) { this->is_sync_mode = is_sync_mode; } | |||
| bool DebugServices::GetSyncMode() { return is_sync_mode; } | |||
| #ifdef ONLINE_DBG_MODE | |||
| } // namespace mindspore | |||
| #endif | |||
| @@ -16,6 +16,17 @@ | |||
| #ifndef MINDSPORE_CCSRC_DEBUG_DEBUG_SERVICES_H_ | |||
| #define MINDSPORE_CCSRC_DEBUG_DEBUG_SERVICES_H_ | |||
| #ifndef OFFLINE_DBG_MODE | |||
| #define ONLINE_DBG_MODE | |||
| #endif | |||
| #ifdef OFFLINE_DBG_MODE | |||
| #include "Eigen/Core" | |||
| #include "Eigen/src/Core/arch/CUDA/Half.h" | |||
| using float16 = Eigen::half; | |||
| #include "debugger/offline_debug/offline_logger.h" | |||
| #endif | |||
| #include <math.h> | |||
| #include <vector> | |||
| #include <string> | |||
| @@ -26,11 +37,13 @@ | |||
| #include <mutex> | |||
| #include <map> | |||
| #include <limits> | |||
| #include <sstream> | |||
| #include "debug/tensor_load.h" | |||
| #include "debug/tensor_data.h" | |||
| #include "ir/dtype.h" | |||
| #ifdef ONLINE_DBG_MODE | |||
| namespace mindspore { | |||
| #endif | |||
| class DebugServices { | |||
| public: | |||
| DebugServices(); | |||
| @@ -103,6 +116,8 @@ class DebugServices { | |||
| unsigned int id; | |||
| condition_t condition; | |||
| std::vector<std::tuple<std::string, bool>> check_node_list; | |||
| std::vector<std::tuple<std::string, std::vector<uint32_t>>> check_node_device_list; | |||
| std::vector<std::tuple<std::string, std::vector<uint32_t>>> check_node_graph_list; | |||
| std::vector<parameter_t> parameter_list; | |||
| size_t location = 0; | |||
| @@ -167,30 +182,55 @@ class DebugServices { | |||
| } | |||
| } watchpoint_t; | |||
| void AddWatchpoint(unsigned int id, unsigned int watch_condition, float parameter, | |||
| const std::vector<std::tuple<std::string, bool>> &check_node_list, | |||
| const std::vector<parameter_t> ¶meter_list); | |||
| void AddWatchpoint( | |||
| unsigned int id, unsigned int watch_condition, float parameter, | |||
| const std::vector<std::tuple<std::string, bool>> &check_node_list, const std::vector<parameter_t> ¶meter_list, | |||
| const std::vector<std::tuple<std::string, std::vector<uint32_t>>> *check_node_device_list = nullptr, | |||
| const std::vector<std::tuple<std::string, std::vector<uint32_t>>> *check_node_graph_list = nullptr); | |||
| void RemoveWatchpoint(unsigned int id); | |||
| void CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<int> *condition, | |||
| std::vector<unsigned int> *watchpoint_id, std::vector<std::vector<parameter_t>> *parameters, | |||
| std::vector<int32_t> *error_code, const std::vector<std::string> &op_overflows, | |||
| const std::vector<std::shared_ptr<TensorData>> &tensor_list, bool init_dbg_suspend, | |||
| const bool step_end, const bool recheck); | |||
| std::vector<std::shared_ptr<TensorData>> *tensor_list, bool init_dbg_suspend, | |||
| const bool step_end, const bool recheck, std::vector<unsigned int> *device_id = nullptr, | |||
| std::vector<unsigned int> *root_graph_id = nullptr); | |||
| void ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name, | |||
| std::vector<char *> *data_ptr, std::vector<ssize_t> *data_size, std::vector<TypePtr> *dtype, | |||
| std::vector<std::vector<int64_t>> *shape); | |||
| void AddWatchPointsToCheck(bool init_dbg_suspend, bool step_end, bool recheck, const std::string &tensor_name, | |||
| const std::string &tensor_name_no_slot, bool *previous_iter_tensor_needed, | |||
| std::string *qualified_tensor_name, std::vector<watchpoint_t> *watchpoints_to_check); | |||
| #ifdef OFFLINE_DBG_MODE | |||
| void GetSlotInfo(const std::string &file_name, const std::string &dump_name, const std::string &specific_dump_dir, | |||
| std::vector<size_t> *slot_list); | |||
| std::size_t GetShapeTypeInfo(const std::string &specific_dump_dir, std::size_t slot, | |||
| const std::string &prefix_dump_file_name, std::string *file_name, std::string *type_name, | |||
| std::string *out_dir, std::vector<int64_t> *shape); | |||
| void ReadDumpedTensor(std::vector<std::string> backend_name, std::vector<size_t> slot, | |||
| std::vector<unsigned int> device_id, std::vector<unsigned int> iteration, | |||
| std::vector<unsigned int> root_graph_id, std::vector<std::shared_ptr<TensorData>> *result_list); | |||
| std::vector<std::shared_ptr<TensorData>> ReadNeededDumpedTensors(unsigned int iteration); | |||
| void *GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bool previous_iter_tensor_needed); | |||
| #endif | |||
| void ReadNodesTensors(std::vector<std::string> name, std::vector<std::string> *ret_name, | |||
| std::vector<char *> *data_ptr, std::vector<ssize_t> *data_size, | |||
| std::vector<unsigned int> *dtype, std::vector<std::vector<int64_t>> *shape); | |||
| #ifdef ONLINE_DBG_MODE | |||
| bool IsWatchPoint(const std::string &kernel_name, const CNodePtr &kernel = nullptr) const; | |||
| bool IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const; | |||
| #endif | |||
| void EmptyTensor(); | |||
| std::vector<std::shared_ptr<TensorData>> GetTensor() const; | |||
| void AddAnalyzedTensorToCache(const bool recheck, const unsigned int id, const std::string &tensor_name); | |||
| std::vector<std::shared_ptr<TensorData>> GetNodeTensorMap(const std::string &node_name) const; | |||
| uint32_t GetTensorLoaderIterNum() const; | |||
| @@ -201,31 +241,51 @@ class DebugServices { | |||
| void EmptyCurrentTensor(); | |||
| #ifdef ONLINE_DBG_MODE | |||
| bool DumpTensorToFile(const std::string &tensor_name, bool trans_flag, const std::string &filepath, | |||
| const std::string &host_fmt, const std::vector<int64_t> &host_shape, TypeId host_type, | |||
| TypeId addr_type_id, const std::string &addr_format, size_t slot) const; | |||
| #endif | |||
| bool LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev); | |||
| std::unordered_map<unsigned int, watchpoint_t> GetWatchpointTable(); | |||
| void ResetLoadedTensors(); | |||
| #ifdef ONLINE_DBG_MODE | |||
| std::vector<std::shared_ptr<TensorData>> GetNodeTensor(const CNodePtr &kernel); | |||
| #endif | |||
| bool TensorExistsInCurrent(std::string tensor_name); | |||
| void MoveTensorCurrentToPrev(std::string tensor_name); | |||
| void SetNetName(std::string net_name); | |||
| std::string GetNetName(); | |||
| void SetDumpDir(std::string dump_dir); | |||
| std::string GetDumpDir(); | |||
| void SetSyncMode(bool is_sync_mode); | |||
| bool GetSyncMode(); | |||
| private: | |||
| std::mutex lock_; | |||
| // to keep track of watchpoints that have been checked already for a tensor in current step | |||
| std::unordered_map<std::string, std::set<int32_t>> wp_id_cache; | |||
| std::unordered_map<unsigned int, watchpoint_t> watchpoint_table; | |||
| std::string net_name; | |||
| std::string dump_dir; | |||
| bool is_sync_mode; | |||
| TensorLoader *tensor_loader_; | |||
| }; | |||
| #ifdef ONLINE_DBG_MODE | |||
| } // namespace mindspore | |||
| #endif | |||
| #endif // MINDSPORE_CCSRC_DEBUG_DEBUG_SERVICES_H_ | |||
| @@ -755,7 +755,7 @@ std::list<TensorProto> Debugger::LoadTensors(const ProtoVector<TensorProto> &ten | |||
| std::vector<std::string> ret_name; | |||
| std::vector<char *> data_ptr; | |||
| std::vector<ssize_t> data_size; | |||
| std::vector<TypePtr> dtype; | |||
| std::vector<unsigned int> dtype; | |||
| std::vector<std::vector<int64_t>> shape; | |||
| std::transform(tensors.begin(), tensors.end(), std::back_inserter(name), GetTensorFullName); | |||
| @@ -789,7 +789,7 @@ std::list<TensorProto> Debugger::LoadTensors(const ProtoVector<TensorProto> &ten | |||
| tensor_item.set_tensor_content(data_ptr[result_index] + size_iter, chunk_size); | |||
| tensor_item.set_data_type(GetDebuggerNumberDataType(dtype[result_index])); | |||
| tensor_item.set_data_type((debugger::DataType)dtype[result_index]); | |||
| for (auto &elem : shape[result_index]) { | |||
| tensor_item.add_dims(elem); | |||
| } | |||
| @@ -827,7 +827,7 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode | |||
| tensor_list = debug_services_->GetNodeTensor(kernel); | |||
| } | |||
| debug_services_->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, ¶meters, &error_codes, overflow_ops, | |||
| tensor_list, initial_suspend_, watchnode.empty(), recheck); | |||
| &tensor_list, initial_suspend_, watchnode.empty(), recheck); | |||
| std::list<WatchpointHit> hits; | |||
| for (unsigned int i = 0; i < name.size(); i++) { | |||
| WatchpointHit hit; | |||
| @@ -0,0 +1,28 @@ | |||
| ----------------------------------------------------------- | |||
| tensor_info_1 attributes: | |||
| node name = Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op169 | |||
| slot = 0 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 1 | |||
| is_parameter = False | |||
| tensor_data_1 attributes: | |||
| data (printed in uint8) = [149 167 124 ... 158 212 164] | |||
| size in bytes = 2076672 | |||
| debugger dtype = 10 | |||
| shape = [32, 192, 13, 13] | |||
| ----------------------------------------------------------- | |||
| tensor_info_2 attributes: | |||
| node name = Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/ReLUV2-op348 | |||
| slot = 1 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 1 | |||
| is_parameter = False | |||
| tensor_data_2 attributes: | |||
| data (printed in uint8) = [ 20 21 18 ... 126 98 25] | |||
| size in bytes = 129792 | |||
| debugger dtype = 6 | |||
| shape = [32, 12, 13, 13, 2] | |||
| @@ -0,0 +1,72 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| Read tensor test script for offline debugger APIs. | |||
| """ | |||
| import mindspore.offline_debug.dbg_services as d | |||
| import numpy as np | |||
| def main(): | |||
| debugger_backend = d.DbgServices( | |||
| dump_file_path="/opt/nvme2n1/j00455527/dumps/async_sink_true/032421") | |||
| _ = debugger_backend.initialize(net_name="alexnet", is_sync_mode=False) | |||
| # output tensor with zero slot | |||
| info1 = d.TensorInfo(node_name="Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/" | |||
| "conv3-Conv2d/Conv2D-op169", | |||
| slot=0, iteration=2, device_id=0, root_graph_id=1, is_parameter=False) | |||
| # output tensor with non-zero slot | |||
| info2 = d.TensorInfo(node_name="Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/" | |||
| "ReLUV2-op348", | |||
| slot=1, iteration=2, device_id=0, root_graph_id=1, is_parameter=False) | |||
| tensor_info = [info1, info2] | |||
| tensor_data = debugger_backend.read_tensors(tensor_info) | |||
| print_read_tensors(tensor_info, tensor_data) | |||
| def print_read_tensors(tensor_info, tensor_data): | |||
| """Print read tensors.""" | |||
| for x, _ in enumerate(tensor_info): | |||
| print("-----------------------------------------------------------") | |||
| print("tensor_info_" + str(x+1) + " attributes:") | |||
| print("node name = ", tensor_info[x].node_name) | |||
| print("slot = ", tensor_info[x].slot) | |||
| print("iteration = ", tensor_info[x].iteration) | |||
| print("device_id = ", tensor_info[x].device_id) | |||
| print("root_graph_id = ", tensor_info[x].root_graph_id) | |||
| print("is_parameter = ", tensor_info[x].is_parameter) | |||
| print() | |||
| print("tensor_data_" + str(x+1) + " attributes:") | |||
| print("data (printed in uint8) = ", np.frombuffer( | |||
| tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) | |||
| py_byte_size = len(tensor_data[x].data_ptr) | |||
| c_byte_size = tensor_data[x].data_size | |||
| if c_byte_size != py_byte_size: | |||
| print("The python byte size of ", py_byte_size, | |||
| " does not match the C++ byte size of ", c_byte_size) | |||
| print("size in bytes = ", tensor_data[x].data_size) | |||
| print("debugger dtype = ", tensor_data[x].dtype) | |||
| print("shape = ", tensor_data[x].shape) | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -0,0 +1,14 @@ | |||
| ----------------------------------------------------------- | |||
| watchpoint_hit for test_1 attributes: | |||
| name = Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op169 | |||
| slot = 0 | |||
| condition = 6 | |||
| watchpoint_id = 1 | |||
| parameter 0 name = param | |||
| parameter 0 disabled = False | |||
| parameter 0 value = 0.0 | |||
| parameter 0 hit = True | |||
| parameter 0 actual_value = -0.1417236328125 | |||
| error code = 0 | |||
| device_id = 0 | |||
| root_graph_id = 1 | |||
| @@ -0,0 +1,92 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| Watchpoints test script for offline debugger APIs. | |||
| """ | |||
| import mindspore.offline_debug.dbg_services as d | |||
| def main(): | |||
| debugger_backend = d.DbgServices( | |||
| dump_file_path="/opt/nvme2n1/j00455527/dumps/async_sink_true/032421") | |||
| _ = debugger_backend.initialize(net_name="alexnet", is_sync_mode=False) | |||
| # NOTES: | |||
| # -> watch_condition=6 is MIN_LT | |||
| # -> watch_condition=18 is CHANGE_TOO_LARGE | |||
| # test 1: watchpoint set and hit (watch_condition=6) | |||
| param1 = d.Parameter(name="param", disabled=False, value=0.0) | |||
| _ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6, | |||
| check_node_list={"Default/network-TrainOneStepCell/network-WithLossCell/" | |||
| "_backbone-AlexNet/conv3-Conv2d/Conv2D-op169": | |||
| {"device_id": [0], "root_graph_id": [1], "is_parameter": False | |||
| }}, parameter_list=[param1]) | |||
| watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2) | |||
| if len(watchpoint_hits_test_1) != 1: | |||
| print("ERROR -> test 1: watchpoint set but not hit just once") | |||
| print_watchpoint_hits(watchpoint_hits_test_1, 1) | |||
| # test 2: watchpoint remove and ensure it's not hit | |||
| _ = debugger_backend.remove_watchpoint(watchpoint_id=1) | |||
| watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2) | |||
| if watchpoint_hits_test_2: | |||
| print("ERROR -> test 2: watchpoint removed but hit") | |||
| # test 3: watchpoint set and not hit, then remove | |||
| param2 = d.Parameter(name="param", disabled=False, value=-1000.0) | |||
| _ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6, | |||
| check_node_list={"Default/network-TrainOneStepCell/network-WithLossCell/" | |||
| "_backbone-AlexNet/conv3-Conv2d/Conv2D-op169": | |||
| {"device_id": [0], "root_graph_id": [1], "is_parameter": False | |||
| }}, parameter_list=[param2]) | |||
| watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2) | |||
| if watchpoint_hits_test_3: | |||
| print("ERROR -> test 3: watchpoint set but not supposed to be hit") | |||
| _ = debugger_backend.remove_watchpoint(watchpoint_id=2) | |||
| def print_watchpoint_hits(watchpoint_hits, test_id): | |||
| """Print watchpoint hits.""" | |||
| for x, _ in enumerate(watchpoint_hits): | |||
| print("-----------------------------------------------------------") | |||
| print("watchpoint_hit for test_%u attributes:" % test_id) | |||
| print("name = ", watchpoint_hits[x].name) | |||
| print("slot = ", watchpoint_hits[x].slot) | |||
| print("condition = ", watchpoint_hits[x].condition) | |||
| print("watchpoint_id = ", watchpoint_hits[x].watchpoint_id) | |||
| for p, _ in enumerate(watchpoint_hits[x].parameters): | |||
| print("parameter ", p, " name = ", | |||
| watchpoint_hits[x].parameters[p].name) | |||
| print("parameter ", p, " disabled = ", | |||
| watchpoint_hits[x].parameters[p].disabled) | |||
| print("parameter ", p, " value = ", | |||
| watchpoint_hits[x].parameters[p].value) | |||
| print("parameter ", p, " hit = ", | |||
| watchpoint_hits[x].parameters[p].hit) | |||
| print("parameter ", p, " actual_value = ", | |||
| watchpoint_hits[x].parameters[p].actual_value) | |||
| print("error code = ", watchpoint_hits[x].error_code) | |||
| print("device_id = ", watchpoint_hits[x].device_id) | |||
| print("root_graph_id = ", watchpoint_hits[x].root_graph_id) | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -0,0 +1,49 @@ | |||
| python sync_trans_false_read_tensors.py > sync_trans_false_read_tensors.actual | |||
| sed -i '/\[WARNING\]/d' sync_trans_false_read_tensors.actual | |||
| sed -i '/Deprecated/d' sync_trans_false_read_tensors.actual | |||
| diff sync_trans_false_read_tensors.actual sync_trans_false_read_tensors.expected | |||
| if [ $? -eq 0 ]; then | |||
| echo sync_trans_false_read_tensors PASSED | |||
| else | |||
| echo sync_trans_false_read_tensors FAILED | |||
| fi | |||
| python sync_trans_true_read_tensors.py > sync_trans_true_read_tensors.actual | |||
| sed -i '/\[WARNING\]/d' sync_trans_true_read_tensors.actual | |||
| sed -i '/Deprecated/d' sync_trans_true_read_tensors.actual | |||
| diff sync_trans_true_read_tensors.actual sync_trans_true_read_tensors.expected | |||
| if [ $? -eq 0 ]; then | |||
| echo sync_trans_true_read_tensors PASSED | |||
| else | |||
| echo sync_trans_true_read_tensors FAILED | |||
| fi | |||
| python sync_trans_false_watchpoints.py > sync_trans_false_watchpoints.actual | |||
| sed -i '/\[WARNING\]/d' sync_trans_false_watchpoints.actual | |||
| sed -i '/Deprecated/d' sync_trans_false_watchpoints.actual | |||
| diff sync_trans_false_watchpoints.actual sync_trans_false_watchpoints.expected | |||
| if [ $? -eq 0 ]; then | |||
| echo sync_trans_false_watchpoints PASSED | |||
| else | |||
| echo sync_trans_false_watchpoints FAILED | |||
| fi | |||
| python async_sink_mode_true_read_tensors.py > async_sink_mode_true_read_tensors.actual | |||
| sed -i '/\[WARNING\]/d' async_sink_mode_true_read_tensors.actual | |||
| sed -i '/Deprecated/d' async_sink_mode_true_read_tensors.actual | |||
| diff async_sink_mode_true_read_tensors.actual async_sink_mode_true_read_tensors.expected | |||
| if [ $? -eq 0 ]; then | |||
| echo async_sink_mode_true_read_tensors PASSED | |||
| else | |||
| echo async_sink_mode_true_read_tensors FAILED | |||
| fi | |||
| python async_sink_mode_true_watchpoints.py > async_sink_mode_true_watchpoints.actual | |||
| sed -i '/\[WARNING\]/d' async_sink_mode_true_watchpoints.actual | |||
| sed -i '/Deprecated/d' async_sink_mode_true_watchpoints.actual | |||
| diff async_sink_mode_true_watchpoints.actual async_sink_mode_true_watchpoints.expected | |||
| if [ $? -eq 0 ]; then | |||
| echo async_sink_mode_true_watchpoints PASSED | |||
| else | |||
| echo async_sink_mode_true_watchpoints FAILED | |||
| fi | |||
| @@ -0,0 +1,70 @@ | |||
| ----------------------------------------------------------- | |||
| tensor_info_1 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias | |||
| slot = 0 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = True | |||
| tensor_data_1 attributes: | |||
| data (printed in uint8) = [170 19 44 181 254 212 16 52 52 162 148 180 130 115 226 180 183 243 | |||
| 101 52 224 79 189 51 10 70 69 51 199 75 159 52 79 98 104 52 | |||
| 106 77 19 52 129 183 8 180 252 58 48 180 35 219 9 52 240 201 | |||
| 179 51 142 151 158 51 210 145 182 53 140 219 0 53 140 219 22 181 | |||
| 46 33 87 180 238 90 122 180 166 10 38 179 202 195 4 53 166 10 | |||
| 150 51 214 120 209 52 235 115 37 180 92 177 215 180 0 136 84 51 | |||
| 72 114 145 180 43 169 255 180 114 27 61 52 76 225 122 50 126 72 | |||
| 159 51 58 35 202 51 114 61 106 51 60 223 63 52 209 179 1 52 | |||
| 232 217 44 178 130 158 109 179 213 231 10 179 37 40 94 179 208 68 | |||
| 64 53 6 52 249 52 162 35 1 181 231 29 155 52 30 201 69 180 | |||
| 229 131 126 51 18 165 109 180 164 112 163 181 116 172 11 178 6 129 | |||
| 37 52 54 205 203 180 115 104 145 52 232 106 219 179 36 40 214 52 | |||
| 202 50 204 52 76 89 38 179 230 140 232 178 168 53 77 52 180 191 | |||
| 108 51 128 183 64 51 56 137 161 180 247 6 143 180 126 63 197 180 | |||
| 198 177 94 52 140 185 139 51 150 178 228 180 255 67 150 52 134 201 | |||
| 164 52 107 43 14 53 174 216 63 179 40 160 41 53 120 88 72 179 | |||
| 218 172 234 52 234 38 25 52 85 159 155 180 254 67 138 180 34 253 | |||
| 118 180 218 61 17 52 242 133 253 52 175 37 180 52 171 62 163 52 | |||
| 202 195 86 53 160 171 45 52 34 31 176 180 156 85 5 53 178 191 | |||
| 68 180 42 203 140 52 248 117 72 52 248 253 212 176 195 100 202 51 | |||
| 87 14 141 52 91 100 235 51 48 221 136 52 143 117 17 180 51 196 | |||
| 25 52 127 29 112 180 152 144 207 178 219 104 64 52 21 174 251 52 | |||
| 164 78 138 181 20 63 6 52 10 249 96 179 163 146 18 53 200 186 | |||
| 236 52 2 188 85 52 124 140 121 179 246 185 22 181 246 74 249 51 | |||
| 70 182 135 53 189 227 76 52 249 160 159 180 134 235 65 53 64 164 | |||
| 255 51 224 156 41 53 142 117 69 181 247 151 101 53 185 175 35 52 | |||
| 164 112 21 53 30 31 212 179 142 151 110 179 176 148 29 181 206 204 | |||
| 88 53 116 215 214 180 172 173 216 51 106 222 153 180 200 152 19 181 | |||
| 176 3 7 52 215 52 87 52] | |||
| size in bytes = 512 | |||
| debugger dtype = 11 | |||
| shape = [128] | |||
| ----------------------------------------------------------- | |||
| tensor_info_2 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op168 | |||
| slot = 0 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = False | |||
| tensor_data_2 attributes: | |||
| data (printed in uint8) = [181 167 46 ... 12 204 164] | |||
| size in bytes = 2076672 | |||
| debugger dtype = 10 | |||
| shape = [32, 12, 13, 13, 16] | |||
| ----------------------------------------------------------- | |||
| tensor_info_3 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op346 | |||
| slot = 1 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = False | |||
| tensor_data_3 attributes: | |||
| data (printed in uint8) = [ 50 17 122 ... 94 42 90] | |||
| size in bytes = 129792 | |||
| debugger dtype = 6 | |||
| shape = [32, 12, 13, 13, 2] | |||
| @@ -0,0 +1,74 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| Read tensor test script for offline debugger APIs. | |||
| """ | |||
| import mindspore.offline_debug.dbg_services as d | |||
| import numpy as np | |||
| def main(): | |||
| debugger_backend = d.DbgServices( | |||
| dump_file_path="/opt/nvme2n1/j00455527/dumps/sync_trans_false/032421/alexnet") | |||
| _ = debugger_backend.initialize( | |||
| net_name="Network Name goes here!", is_sync_mode=True) | |||
| # parameter | |||
| info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias", | |||
| slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True) | |||
| # output tensor with zero slot | |||
| info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op168", | |||
| slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False) | |||
| # output tensor with non-zero slot | |||
| info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op346", | |||
| slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False) | |||
| tensor_info = [info1, info2, info3] | |||
| tensor_data = debugger_backend.read_tensors(tensor_info) | |||
| print_read_tensors(tensor_info, tensor_data) | |||
| def print_read_tensors(tensor_info, tensor_data): | |||
| """Print read tensors.""" | |||
| for x, _ in enumerate(tensor_info): | |||
| print("-----------------------------------------------------------") | |||
| print("tensor_info_" + str(x+1) + " attributes:") | |||
| print("node name = ", tensor_info[x].node_name) | |||
| print("slot = ", tensor_info[x].slot) | |||
| print("iteration = ", tensor_info[x].iteration) | |||
| print("device_id = ", tensor_info[x].device_id) | |||
| print("root_graph_id = ", tensor_info[x].root_graph_id) | |||
| print("is_parameter = ", tensor_info[x].is_parameter) | |||
| print() | |||
| print("tensor_data_" + str(x+1) + " attributes:") | |||
| print("data (printed in uint8) = ", np.frombuffer( | |||
| tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) | |||
| py_byte_size = len(tensor_data[x].data_ptr) | |||
| c_byte_size = tensor_data[x].data_size | |||
| if c_byte_size != py_byte_size: | |||
| print("The python byte size of ", py_byte_size, | |||
| " does not match the C++ byte size of ", c_byte_size) | |||
| print("size in bytes = ", tensor_data[x].data_size) | |||
| print("debugger dtype = ", tensor_data[x].dtype) | |||
| print("shape = ", tensor_data[x].shape) | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -0,0 +1,33 @@ | |||
| ----------------------------------------------------------- | |||
| watchpoint_hit for test_1 attributes: | |||
| name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op168 | |||
| slot = 0 | |||
| condition = 6 | |||
| watchpoint_id = 1 | |||
| parameter 0 name = param | |||
| parameter 0 disabled = False | |||
| parameter 0 value = 0.0 | |||
| parameter 0 hit = True | |||
| parameter 0 actual_value = -0.14013671875 | |||
| error code = 0 | |||
| device_id = 0 | |||
| root_graph_id = 0 | |||
| ----------------------------------------------------------- | |||
| watchpoint_hit for test_4 attributes: | |||
| name = Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc3.bias | |||
| slot = 0 | |||
| condition = 18 | |||
| watchpoint_id = 3 | |||
| parameter 0 name = abs_mean_update_ratio_gt | |||
| parameter 0 disabled = False | |||
| parameter 0 value = 0.0 | |||
| parameter 0 hit = True | |||
| parameter 0 actual_value = 0.5243796973599475 | |||
| parameter 1 name = epsilon | |||
| parameter 1 disabled = True | |||
| parameter 1 value = 0.0 | |||
| parameter 1 hit = False | |||
| parameter 1 actual_value = 0.0 | |||
| error code = 0 | |||
| device_id = 0 | |||
| root_graph_id = 0 | |||
| @@ -0,0 +1,109 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| Watchpoints test script for offline debugger APIs. | |||
| """ | |||
| import mindspore.offline_debug.dbg_services as d | |||
| def main(): | |||
| debugger_backend = d.DbgServices( | |||
| dump_file_path="/opt/nvme2n1/j00455527/dumps/sync_trans_false/032421/alexnet") | |||
| _ = debugger_backend.initialize( | |||
| net_name="Network Name goes here!", is_sync_mode=True) | |||
| # NOTES: | |||
| # -> watch_condition=6 is MIN_LT | |||
| # -> watch_condition=18 is CHANGE_TOO_LARGE | |||
| # test 1: watchpoint set and hit (watch_condition=6) | |||
| param1 = d.Parameter(name="param", disabled=False, value=0.0) | |||
| _ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6, | |||
| check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/" | |||
| "Conv2D-op168": | |||
| {"device_id": [0], "root_graph_id": [0], "is_parameter": False | |||
| }}, parameter_list=[param1]) | |||
| watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2) | |||
| if len(watchpoint_hits_test_1) != 1: | |||
| print("ERROR -> test 1: watchpoint set but not hit just once") | |||
| print_watchpoint_hits(watchpoint_hits_test_1, 1) | |||
| # test 2: watchpoint remove and ensure it's not hit | |||
| _ = debugger_backend.remove_watchpoint(watchpoint_id=1) | |||
| watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2) | |||
| if watchpoint_hits_test_2: | |||
| print("ERROR -> test 2: watchpoint removed but hit") | |||
| # test 3: watchpoint set and not hit, then remove | |||
| param2 = d.Parameter(name="param", disabled=False, value=-1000.0) | |||
| _ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6, | |||
| check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/" | |||
| "Conv2D-op308": | |||
| {"device_id": [0], "root_graph_id": [0], "is_parameter": False | |||
| }}, parameter_list=[param2]) | |||
| watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2) | |||
| if watchpoint_hits_test_3: | |||
| print("ERROR -> test 3: watchpoint set but not supposed to be hit") | |||
| _ = debugger_backend.remove_watchpoint(watchpoint_id=2) | |||
| # test 4: weight change watchpoint set and hit | |||
| param_abs_mean_update_ratio_gt = d.Parameter( | |||
| name="abs_mean_update_ratio_gt", disabled=False, value=0.0) | |||
| param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0) | |||
| _ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18, | |||
| check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/" | |||
| "Parameter[6]_11/fc3.bias": | |||
| {"device_id": [0], "root_graph_id": [0], "is_parameter": True | |||
| }}, parameter_list=[param_abs_mean_update_ratio_gt, | |||
| param_epsilon]) | |||
| watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3) | |||
| if len(watchpoint_hits_test_4) != 1: | |||
| print("ERROR -> test 4: watchpoint weight change set but not hit just once") | |||
| print_watchpoint_hits(watchpoint_hits_test_4, 4) | |||
| def print_watchpoint_hits(watchpoint_hits, test_id): | |||
| """Print watchpoint hits.""" | |||
| for x, _ in enumerate(watchpoint_hits): | |||
| print("-----------------------------------------------------------") | |||
| print("watchpoint_hit for test_%u attributes:" % test_id) | |||
| print("name = ", watchpoint_hits[x].name) | |||
| print("slot = ", watchpoint_hits[x].slot) | |||
| print("condition = ", watchpoint_hits[x].condition) | |||
| print("watchpoint_id = ", watchpoint_hits[x].watchpoint_id) | |||
| for p, _ in enumerate(watchpoint_hits[x].parameters): | |||
| print("parameter ", p, " name = ", | |||
| watchpoint_hits[x].parameters[p].name) | |||
| print("parameter ", p, " disabled = ", | |||
| watchpoint_hits[x].parameters[p].disabled) | |||
| print("parameter ", p, " value = ", | |||
| watchpoint_hits[x].parameters[p].value) | |||
| print("parameter ", p, " hit = ", | |||
| watchpoint_hits[x].parameters[p].hit) | |||
| print("parameter ", p, " actual_value = ", | |||
| watchpoint_hits[x].parameters[p].actual_value) | |||
| print("error code = ", watchpoint_hits[x].error_code) | |||
| print("device_id = ", watchpoint_hits[x].device_id) | |||
| print("root_graph_id = ", watchpoint_hits[x].root_graph_id) | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -0,0 +1,70 @@ | |||
| ----------------------------------------------------------- | |||
| tensor_info_1 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias | |||
| slot = 0 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = True | |||
| tensor_data_1 attributes: | |||
| data (printed in uint8) = [230 208 10 52 104 34 252 52 4 231 144 52 188 150 64 180 88 236 | |||
| 15 180 254 135 180 51 131 226 147 52 88 202 62 53 2 43 55 53 | |||
| 231 29 87 180 220 249 30 180 157 17 177 180 81 107 140 181 8 95 | |||
| 192 180 89 134 112 180 96 238 90 178 156 196 212 180 206 25 15 181 | |||
| 212 154 6 180 91 211 116 52 191 14 140 51 128 106 124 53 28 158 | |||
| 70 181 182 21 251 50 100 204 157 179 88 202 42 180 7 95 8 53 | |||
| 128 251 238 52 241 133 241 52 111 86 157 179 48 221 148 180 200 7 | |||
| 141 180 236 226 182 51 190 82 158 180 140 108 179 180 195 134 215 179 | |||
| 103 213 39 179 89 168 149 180 42 58 58 180 64 53 62 179 250 126 | |||
| 158 52 38 83 117 52 0 0 136 180 136 133 122 51 110 18 131 179 | |||
| 238 13 94 51 102 136 15 181 134 90 227 180 16 11 117 180 35 74 | |||
| 163 52 105 0 87 181 112 18 131 50 226 233 67 181 217 172 10 52 | |||
| 206 25 217 52 208 213 22 52 146 203 87 180 74 46 207 52 178 191 | |||
| 4 180 100 93 216 52 119 190 171 180 223 2 5 181 128 72 207 179 | |||
| 58 146 11 179 224 79 137 52 143 228 154 180 246 219 215 179 14 79 | |||
| 195 52 126 29 64 52 132 192 42 51 94 220 86 52 94 109 1 181 | |||
| 72 37 117 178 110 197 94 180 160 94 153 179 118 224 80 181 156 17 | |||
| 37 50 120 156 162 53 26 115 135 180 228 20 29 53 145 126 147 52 | |||
| 99 16 48 180 211 188 199 180 52 51 99 180 93 254 227 52 152 126 | |||
| 123 49 6 18 16 181 5 163 130 51 27 158 98 53 134 235 189 52 | |||
| 119 45 9 180 130 115 110 52 158 128 162 52 232 251 197 180 178 46 | |||
| 158 179 57 214 157 52 172 207 161 180 208 0 222 49 242 99 32 53 | |||
| 20 174 135 50 247 117 176 52 194 57 43 180 140 108 135 51 243 65 | |||
| 175 51 187 73 156 51 63 232 217 50 180 234 115 52 194 168 148 52 | |||
| 27 192 183 180 45 178 157 52 125 208 17 53 236 192 65 53 190 193 | |||
| 7 53 254 246 57 53 3 43 199 51 64 164 215 180 220 104 240 51 | |||
| 23 72 24 180 68 173 9 51 72 114 29 53 105 0 57 181 188 150 | |||
| 8 53 229 97 131 53 0 34 189 51 163 146 74 53 31 244 204 51 | |||
| 86 193 220 180 156 51 146 179] | |||
| size in bytes = 512 | |||
| debugger dtype = 11 | |||
| shape = [128] | |||
| ----------------------------------------------------------- | |||
| tensor_info_2 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op171 | |||
| slot = 0 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = False | |||
| tensor_data_2 attributes: | |||
| data (printed in uint8) = [ 99 26 69 ... 154 218 164] | |||
| size in bytes = 2076672 | |||
| debugger dtype = 10 | |||
| shape = [32, 192, 13, 13] | |||
| ----------------------------------------------------------- | |||
| tensor_info_3 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op353 | |||
| slot = 1 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = False | |||
| tensor_data_3 attributes: | |||
| data (printed in uint8) = [19 17 27 ... 94 42 90] | |||
| size in bytes = 129792 | |||
| debugger dtype = 6 | |||
| shape = [32, 12, 13, 13, 2] | |||
| @@ -0,0 +1,74 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| Read tensor test script for offline debugger APIs. | |||
| """ | |||
| import mindspore.offline_debug.dbg_services as d | |||
| import numpy as np | |||
| def main(): | |||
| debugger_backend = d.DbgServices( | |||
| dump_file_path="/opt/nvme2n1/j00455527/dumps/sync_trans_true/032421/alexnet") | |||
| _ = debugger_backend.initialize( | |||
| net_name="Network Name goes here!", is_sync_mode=True) | |||
| # parameter | |||
| info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias", | |||
| slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True) | |||
| # output tensor with zero slot | |||
| info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op171", | |||
| slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False) | |||
| # output tensor with non-zero slot | |||
| info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op353", | |||
| slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False) | |||
| tensor_info = [info1, info2, info3] | |||
| tensor_data = debugger_backend.read_tensors(tensor_info) | |||
| print_read_tensors(tensor_info, tensor_data) | |||
| def print_read_tensors(tensor_info, tensor_data): | |||
| """Print read tensors.""" | |||
| for x, _ in enumerate(tensor_info): | |||
| print("-----------------------------------------------------------") | |||
| print("tensor_info_" + str(x+1) + " attributes:") | |||
| print("node name = ", tensor_info[x].node_name) | |||
| print("slot = ", tensor_info[x].slot) | |||
| print("iteration = ", tensor_info[x].iteration) | |||
| print("device_id = ", tensor_info[x].device_id) | |||
| print("root_graph_id = ", tensor_info[x].root_graph_id) | |||
| print("is_parameter = ", tensor_info[x].is_parameter) | |||
| print() | |||
| print("tensor_data_" + str(x+1) + " attributes:") | |||
| print("data (printed in uint8) = ", np.frombuffer( | |||
| tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) | |||
| py_byte_size = len(tensor_data[x].data_ptr) | |||
| c_byte_size = tensor_data[x].data_size | |||
| if c_byte_size != py_byte_size: | |||
| print("The python byte size of ", py_byte_size, | |||
| " does not match the C++ byte size of ", c_byte_size) | |||
| print("size in bytes = ", tensor_data[x].data_size) | |||
| print("debugger dtype = ", tensor_data[x].dtype) | |||
| print("shape = ", tensor_data[x].shape) | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -0,0 +1,261 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "debugger/offline_debug/dbg_services.h" | |||
| #include <algorithm> | |||
| DbgServices::DbgServices(bool verbose) { | |||
| DbgLogger::verbose = verbose; | |||
| char *dbg_log_path = getenv("OFFLINE_DBG_LOG"); | |||
| if (dbg_log_path != NULL) { | |||
| DbgLogger::verbose = true; | |||
| } | |||
| debug_services = new DebugServices(); | |||
| } | |||
| DbgServices::DbgServices(const DbgServices &other) { | |||
| MS_LOG(INFO) << "cpp DbgServices object is created via copy"; | |||
| debug_services = new DebugServices(*other.debug_services); | |||
| } | |||
| DbgServices &DbgServices::operator=(const DbgServices &other) { | |||
| MS_LOG(INFO) << "cpp DbgServices object is being assigned a different state"; | |||
| if (this != &other) { | |||
| delete debug_services; | |||
| debug_services = new DebugServices(*other.debug_services); | |||
| } | |||
| return *this; | |||
| } | |||
| DbgServices::~DbgServices() { | |||
| MS_LOG(INFO) << "cpp DbgServices object is deleted"; | |||
| delete debug_services; | |||
| } | |||
| std::string DbgServices::GetVersion() { | |||
| MS_LOG(INFO) << "get version is called"; | |||
| return "1.2.0"; | |||
| } | |||
| int32_t DbgServices::Initialize(std::string net_name, std::string dump_folder_path, bool is_sync_mode) { | |||
| MS_LOG(INFO) << "cpp DbgServices initialize network name " << net_name; | |||
| MS_LOG(INFO) << "cpp DbgServices initialize dump folder path " << dump_folder_path; | |||
| MS_LOG(INFO) << "cpp DbgServices initialize sync mode " << is_sync_mode; | |||
| debug_services->SetNetName(net_name); | |||
| debug_services->SetDumpDir(dump_folder_path); | |||
| debug_services->SetSyncMode(is_sync_mode); | |||
| return 0; | |||
| } | |||
| int32_t DbgServices::AddWatchpoint( | |||
| unsigned int id, unsigned int watch_condition, | |||
| std::map<std::string, std::map<std::string, std::variant<bool, std::vector<std::string>>>> check_nodes, | |||
| std::vector<parameter_t> parameter_list) { | |||
| MS_LOG(INFO) << "cpp start"; | |||
| MS_LOG(INFO) << "cpp DbgServices AddWatchpoint id " << id; | |||
| MS_LOG(INFO) << "cpp DbgServices AddWatchpoint watch_condition " << watch_condition; | |||
| for (auto const &node : check_nodes) { | |||
| MS_LOG(INFO) << "cpp DbgServices AddWatchpoint name " << node.first; | |||
| auto attr_map = node.second; | |||
| bool is_parameter = std::get<bool>(attr_map["is_parameter"]); | |||
| MS_LOG(INFO) << "cpp DbgServices AddWatchpoint is_parameter " << is_parameter; | |||
| // std::vector<uint32_t> device_id = std::get<std::vector<uint32_t>>(attr_map["device_id"]); | |||
| std::vector<std::string> device_id_str = std::get<std::vector<std::string>>(attr_map["device_id"]); | |||
| std::vector<std::uint32_t> device_id; | |||
| std::transform(device_id_str.begin(), device_id_str.end(), std::back_inserter(device_id), | |||
| [](std::string &id_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(id_str)); }); | |||
| MS_LOG(INFO) << "cpp DbgServices AddWatchpoint device_id "; | |||
| for (auto const &i : device_id) { | |||
| MS_LOG(INFO) << i << " "; | |||
| } | |||
| // std::vector<uint32_t> root_graph_id = std::get<std::vector<uint32_t>>(attr_map["root_graph_id"]); | |||
| std::vector<std::string> root_graph_id_str = std::get<std::vector<std::string>>(attr_map["root_graph_id"]); | |||
| std::vector<std::uint32_t> root_graph_id; | |||
| std::transform( | |||
| root_graph_id_str.begin(), root_graph_id_str.end(), std::back_inserter(root_graph_id), | |||
| [](std::string &graph_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(graph_str)); }); | |||
| MS_LOG(INFO) << "cpp DbgServices AddWatchpoint root_graph_id"; | |||
| for (auto const &j : root_graph_id) { | |||
| MS_LOG(INFO) << j << " "; | |||
| } | |||
| } | |||
| for (auto const ¶meter : parameter_list) { | |||
| MS_LOG(INFO) << "cpp DbgServices AddWatchpoint parameter name " << parameter.name; | |||
| MS_LOG(INFO) << "cpp DbgServices AddWatchpoint parameter disabled " << parameter.disabled; | |||
| MS_LOG(INFO) << "cpp DbgServices AddWatchpoint parameter value " << parameter.value; | |||
| MS_LOG(INFO) << "cpp DbgServices AddWatchpoint parameter hit " << parameter.hit; | |||
| MS_LOG(INFO) << "cpp DbgServices AddWatchpoint parameter actual_value " << parameter.actual_value; | |||
| } | |||
| std::vector<std::tuple<std::string, bool>> check_node_list; | |||
| std::vector<std::tuple<std::string, std::vector<uint32_t>>> check_node_device_list; | |||
| std::vector<std::tuple<std::string, std::vector<uint32_t>>> check_node_graph_list; | |||
| std::vector<DebugServices::parameter_t> parameter_list_backend; | |||
| std::transform(check_nodes.begin(), check_nodes.end(), std::back_inserter(check_node_list), | |||
| [](auto &node) -> std::tuple<std::string, bool> { | |||
| auto attr_map = node.second; | |||
| return std::make_tuple(node.first, std::get<bool>(attr_map["is_parameter"])); | |||
| }); | |||
| std::transform(check_nodes.begin(), check_nodes.end(), std::back_inserter(check_node_device_list), | |||
| [](auto &node) -> std::tuple<std::string, std::vector<uint32_t>> { | |||
| auto attr_map = node.second; | |||
| std::vector<std::string> device_id_str = std::get<std::vector<std::string>>(attr_map["device_id"]); | |||
| std::vector<std::uint32_t> device_id; | |||
| std::transform( | |||
| device_id_str.begin(), device_id_str.end(), std::back_inserter(device_id), | |||
| [](std::string &id_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(id_str)); }); | |||
| return std::make_tuple(node.first, device_id); | |||
| }); | |||
| std::transform( | |||
| check_nodes.begin(), check_nodes.end(), std::back_inserter(check_node_graph_list), | |||
| [](auto &node) -> std::tuple<std::string, std::vector<uint32_t>> { | |||
| auto attr_map = node.second; | |||
| std::vector<std::string> root_graph_id_str = std::get<std::vector<std::string>>(attr_map["root_graph_id"]); | |||
| std::vector<std::uint32_t> root_graph_id; | |||
| std::transform( | |||
| root_graph_id_str.begin(), root_graph_id_str.end(), std::back_inserter(root_graph_id), | |||
| [](std::string &graph_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(graph_str)); }); | |||
| return std::make_tuple(node.first, root_graph_id); | |||
| }); | |||
| std::transform( | |||
| parameter_list.begin(), parameter_list.end(), std::back_inserter(parameter_list_backend), | |||
| [](const parameter_t ¶meter) -> DebugServices::parameter_t { | |||
| return DebugServices::parameter_t{parameter.name, parameter.disabled, parameter.value, parameter.hit}; | |||
| }); | |||
| debug_services->AddWatchpoint(id, watch_condition, 0, check_node_list, parameter_list_backend, | |||
| &check_node_device_list, &check_node_graph_list); | |||
| MS_LOG(INFO) << "cpp end"; | |||
| return 0; | |||
| } | |||
| int32_t DbgServices::RemoveWatchpoint(unsigned int id) { | |||
| MS_LOG(INFO) << "cpp DbgServices RemoveWatchpoint id " << id; | |||
| debug_services->RemoveWatchpoint(id); | |||
| return 0; | |||
| } | |||
| std::vector<watchpoint_hit_t> DbgServices::CheckWatchpoints(unsigned int iteration) { | |||
| MS_LOG(INFO) << "cpp DbgServices CheckWatchpoint iteration " << iteration; | |||
| std::vector<std::string> name; | |||
| std::vector<std::string> slot; | |||
| std::vector<int> condition; | |||
| std::vector<unsigned int> watchpoint_id; | |||
| std::vector<std::string> overflow_ops; | |||
| std::vector<std::vector<DebugServices::parameter_t>> parameters; | |||
| std::vector<int32_t> error_codes; | |||
| std::vector<unsigned int> device_id; | |||
| std::vector<unsigned int> root_graph_id; | |||
| // #ifdef ENABLE_D | |||
| // overflow_ops = CheckOpOverflow(); | |||
| // #endif | |||
| std::vector<std::shared_ptr<TensorData>> tensor_list; | |||
| tensor_list = debug_services->ReadNeededDumpedTensors(iteration); | |||
| debug_services->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, ¶meters, &error_codes, overflow_ops, | |||
| &tensor_list, false, true, true, &device_id, &root_graph_id); | |||
| std::vector<watchpoint_hit_t> hits; | |||
| for (unsigned int i = 0; i < name.size(); i++) { | |||
| std::vector<DebugServices::parameter_t> ¶meter = parameters[i]; | |||
| std::vector<parameter_t> api_parameter_vector; | |||
| for (const auto &p : parameter) { | |||
| parameter_t api_parameter(p.name, p.disabled, p.value, p.hit, p.actual_value); | |||
| api_parameter_vector.push_back(api_parameter); | |||
| } | |||
| watchpoint_hit_t hit(name[i], std::stoi(slot[i]), condition[i], watchpoint_id[i], api_parameter_vector, | |||
| error_codes[i], device_id[i], root_graph_id[i]); | |||
| MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t name " << hit.name; | |||
| MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t slot " << hit.slot; | |||
| MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t watchpoint_id " << hit.watchpoint_id; | |||
| MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t error_code " << hit.error_code; | |||
| MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t device_id " << hit.device_id; | |||
| MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t root_graph_id " << hit.root_graph_id; | |||
| for (auto const ¶meter_i : api_parameter_vector) { | |||
| MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t parameter name " << parameter_i.name; | |||
| MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t parameter disabled " << parameter_i.disabled; | |||
| MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t parameter value " << parameter_i.value; | |||
| MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t parameter hit " << parameter_i.hit; | |||
| MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t parameter actual_value " << parameter_i.actual_value; | |||
| } | |||
| hits.push_back(hit); | |||
| } | |||
| return hits; | |||
| } | |||
| std::string GetTensorFullName(tensor_info_t info) { | |||
| std::string node_name = info.node_name; | |||
| if (info.is_parameter) { | |||
| // scopes in node name are separated by '/' | |||
| // use the name without scope if truncate is true | |||
| std::size_t found = node_name.find_last_of("/"); | |||
| node_name = node_name.substr(found + 1); | |||
| } | |||
| return node_name + ":" + std::to_string(info.slot); | |||
| } | |||
| unsigned int GetTensorDeviceId(tensor_info_t info) { return info.device_id; } | |||
| unsigned int GetTensorRootGraphId(tensor_info_t info) { return info.root_graph_id; } | |||
| unsigned int GetTensorIteration(tensor_info_t info) { return info.iteration; } | |||
| unsigned int GetTensorSlot(tensor_info_t info) { return info.slot; } | |||
| std::vector<tensor_data_t> DbgServices::ReadTensors(std::vector<tensor_info_t> info) { | |||
| for (auto i : info) { | |||
| MS_LOG(INFO) << "cpp DbgServices ReadTensor info name " << i.node_name << ", slot " << i.slot << ", iteration " | |||
| << i.iteration << ", device_id " << i.device_id << ", root_graph_id " << i.root_graph_id; | |||
| } | |||
| std::vector<std::string> backend_name; | |||
| std::vector<unsigned int> device_id; | |||
| std::vector<unsigned int> root_graph_id; | |||
| std::vector<unsigned int> iteration; | |||
| std::vector<size_t> slot; | |||
| std::vector<std::shared_ptr<TensorData>> result_list; | |||
| std::vector<tensor_data_t> tensors_read; | |||
| std::transform(info.begin(), info.end(), std::back_inserter(backend_name), GetTensorFullName); | |||
| std::transform(info.begin(), info.end(), std::back_inserter(slot), GetTensorSlot); | |||
| std::transform(info.begin(), info.end(), std::back_inserter(device_id), GetTensorDeviceId); | |||
| std::transform(info.begin(), info.end(), std::back_inserter(root_graph_id), GetTensorRootGraphId); | |||
| std::transform(info.begin(), info.end(), std::back_inserter(iteration), GetTensorIteration); | |||
| MS_LOG(INFO) << "cpp before"; | |||
| debug_services->ReadDumpedTensor(backend_name, slot, device_id, iteration, root_graph_id, &result_list); | |||
| MS_LOG(INFO) << "cpp after"; | |||
| for (auto result : result_list) { | |||
| tensor_data_t tensor_data_item(result->GetDataPtr(), result->GetByteSize(), result->GetType(), result->GetShape()); | |||
| tensors_read.push_back(tensor_data_item); | |||
| } | |||
| MS_LOG(INFO) << "cpp end"; | |||
| return tensors_read; | |||
| } | |||
| @@ -0,0 +1,149 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef DEBUG_DBG_SERVICES_H_ | |||
| #define DEBUG_DBG_SERVICES_H_ | |||
| #include <vector> | |||
| #include <string> | |||
| #include <map> | |||
| #include <memory> | |||
| #include <tuple> | |||
| #include <iostream> | |||
| #include <variant> | |||
| #include "pybind11/pybind11.h" | |||
| #include "pybind11/stl.h" | |||
| #include "pybind11/stl_bind.h" | |||
| #include "debug/debug_services.h" | |||
| namespace py = pybind11; | |||
| typedef struct parameter { | |||
| parameter(const std::string &name, bool disabled, double value, bool hit, double actual_value) | |||
| : name(name), disabled(disabled), value(value), hit(hit), actual_value(actual_value) {} | |||
| const std::string get_name() const { return name; } | |||
| const bool get_disabled() const { return disabled; } | |||
| const double get_value() const { return value; } | |||
| const bool get_hit() const { return hit; } | |||
| const double get_actual_value() const { return actual_value; } | |||
| std::string name; | |||
| bool disabled; | |||
| double value; | |||
| bool hit; | |||
| double actual_value; | |||
| } parameter_t; | |||
| typedef struct watchpoint_hit { | |||
| watchpoint_hit(const std::string &name, uint32_t slot, int condition, uint32_t watchpoint_id, | |||
| const std::vector<parameter_t> ¶meters, int32_t error_code, uint32_t device_id, | |||
| uint32_t root_graph_id) | |||
| : name(name), | |||
| slot(slot), | |||
| condition(condition), | |||
| watchpoint_id(watchpoint_id), | |||
| parameters(parameters), | |||
| error_code(error_code), | |||
| device_id(device_id), | |||
| root_graph_id(root_graph_id) {} | |||
| const std::string get_name() const { return name; } | |||
| const uint32_t get_slot() const { return slot; } | |||
| const int get_condition() const { return condition; } | |||
| const uint32_t get_watchpoint_id() const { return watchpoint_id; } | |||
| const std::vector<parameter_t> get_parameters() const { return parameters; } | |||
| const int32_t get_error_code() const { return error_code; } | |||
| const uint32_t get_device_id() const { return device_id; } | |||
| const uint32_t get_root_graph_id() const { return root_graph_id; } | |||
| std::string name; | |||
| uint32_t slot; | |||
| int condition; | |||
| uint32_t watchpoint_id; | |||
| std::vector<parameter_t> parameters; | |||
| int32_t error_code; | |||
| uint32_t device_id; | |||
| uint32_t root_graph_id; | |||
| } watchpoint_hit_t; | |||
| typedef struct tensor_info { | |||
| tensor_info(const std::string &node_name, uint32_t slot, uint32_t iteration, uint32_t device_id, | |||
| uint32_t root_graph_id, bool is_parameter) | |||
| : node_name(node_name), | |||
| slot(slot), | |||
| iteration(iteration), | |||
| device_id(device_id), | |||
| root_graph_id(root_graph_id), | |||
| is_parameter(is_parameter) {} | |||
| const std::string get_node_name() const { return node_name; } | |||
| const uint32_t get_slot() const { return slot; } | |||
| const uint32_t get_iteration() const { return iteration; } | |||
| const uint32_t get_device_id() const { return device_id; } | |||
| const uint32_t get_root_graph_id() const { return root_graph_id; } | |||
| const bool get_is_parameter() const { return is_parameter; } | |||
| std::string node_name; | |||
| uint32_t slot; | |||
| uint32_t iteration; | |||
| uint32_t device_id; | |||
| uint32_t root_graph_id; | |||
| bool is_parameter; | |||
| } tensor_info_t; | |||
| typedef struct tensor_data { | |||
| tensor_data(char *data_ptr, uint64_t data_size, int dtype, const std::vector<int64_t> &shape) | |||
| : data_size(data_size), dtype(dtype), shape(shape) { | |||
| if (data_ptr != NULL) { | |||
| this->data_ptr = py::bytes(data_ptr, data_size); | |||
| } else { | |||
| this->data_ptr = py::bytes(); | |||
| } | |||
| } | |||
| const py::bytes get_data_ptr() const { return data_ptr; } | |||
| const uint64_t get_data_size() const { return data_size; } | |||
| const int get_dtype() const { return dtype; } | |||
| const std::vector<int64_t> &get_shape() const { return shape; } | |||
| py::bytes data_ptr; | |||
| uint64_t data_size; | |||
| int dtype; | |||
| std::vector<int64_t> shape; | |||
| } tensor_data_t; | |||
| class DbgServices { | |||
| private: | |||
| DebugServices *debug_services; | |||
| public: | |||
| explicit DbgServices(bool verbose = false); | |||
| DbgServices(const DbgServices &other); | |||
| DbgServices &operator=(const DbgServices &other); | |||
| ~DbgServices(); | |||
| int32_t Initialize(std::string net_name, std::string dump_folder_path, bool is_sync_mode); | |||
| int32_t AddWatchpoint( | |||
| unsigned int id, unsigned int watch_condition, | |||
| std::map<std::string, std::map<std::string, std::variant<bool, std::vector<std::string>>>> check_nodes, | |||
| std::vector<parameter_t> parameter_list); | |||
| int32_t RemoveWatchpoint(unsigned int id); | |||
| std::vector<watchpoint_hit_t> CheckWatchpoints(unsigned int iteration); | |||
| std::vector<tensor_data_t> ReadTensors(std::vector<tensor_info_t> info); | |||
| std::string GetVersion(); | |||
| }; | |||
| #endif // DEBUG_DBG_SERVICES_H_ | |||
| @@ -0,0 +1,865 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| The module DbgServices provides offline debugger APIs. | |||
| """ | |||
| import mindspore._mindspore_offline_debug as cds | |||
| from mi_validators import check_init, check_initialize, check_add_watchpoint, check_remove_watchpoint, check_check_watchpoints, check_read_tensors, check_initialize_done, check_tensor_info_init, check_tensor_data_init, check_watchpoint_hit_init, check_parameter_init | |||
| def get_version(): | |||
| """ | |||
| Function to return offline Debug Services version. | |||
| Returns: | |||
| version (str): dbgServices version. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> version = dbg_services.get_version() | |||
| """ | |||
| return cds.DbgServices(False).GetVersion() | |||
| class DbgLogger: | |||
| """ | |||
| Offline Debug Services Logger | |||
| Args: | |||
| verbose (bool): whether to print logs. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> version = dbg_services.DbgLogger(verbose=False) | |||
| """ | |||
| def __init__(self, verbose): | |||
| self.verbose = verbose | |||
| def __call__(self, *logs): | |||
| if self.verbose: | |||
| print(logs) | |||
| log = DbgLogger(False) | |||
| class DbgServices(): | |||
| """ | |||
| Offline Debug Services class. | |||
| Args: | |||
| dump_file_path (str): directory where the dump files are saved. | |||
| verbose (bool): whether to print logs (default: False).. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> d = dbg_services.DbgServices(dump_file_path="dump_file_path", | |||
| >>> verbose=True) | |||
| """ | |||
| @check_init | |||
| def __init__(self, dump_file_path, verbose=False): | |||
| log.verbose = verbose | |||
| log("in Python __init__, file path is ", dump_file_path) | |||
| self.dump_file_path = dump_file_path | |||
| self.dbg_instance = cds.DbgServices(verbose) | |||
| self.version = self.dbg_instance.GetVersion() | |||
| self.verbose = verbose | |||
| self.initialized = False | |||
| @check_initialize | |||
| def initialize(self, net_name, is_sync_mode=True): | |||
| """ | |||
| Initialize Debug Service. | |||
| Args: | |||
| net_name (str): Network name. | |||
| is_sync_mode (bool): Whether to process synchronous or asynchronous dump files mode | |||
| (default: True (synchronous)). | |||
| Returns: | |||
| Initialized Debug Service instance. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> d = dbg_services.DbgServices(dump_file_path="dump_file_path", | |||
| >>> verbose=True) | |||
| >>> d_init = d.initialize(net_name="network name", is_sync_mode=True) | |||
| """ | |||
| log("in Python Initialize dump_file_path ", self.dump_file_path) | |||
| self.initialized = True | |||
| return self.dbg_instance.Initialize(net_name, self.dump_file_path, is_sync_mode) | |||
| @check_initialize_done | |||
| @check_add_watchpoint | |||
| def add_watchpoint(self, watchpoint_id, watch_condition, check_node_list, parameter_list): | |||
| """ | |||
| Adding watchpoint to Debug Service instance. | |||
| Args: | |||
| watchpoint_id (int): Watchpoint id | |||
| watch_condition (int): A representation of the condition to be checked. | |||
| check_node_list (dict): Dictionary of node names (str) as key, | |||
| mapping to device_id (list of ints), root_graph_id (list of ints) and is_parameter | |||
| (bool). | |||
| parameter_list (list): List of parameters in watchpoint. Parameters should be instances of Parameter class. | |||
| Each parameter describes the value to be checked in watchpoint. | |||
| Returns: | |||
| Debug Service instance with added watchpoint. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> d = dbg_services.DbgServices(dump_file_path="dump_file_path", | |||
| >>> verbose=True) | |||
| >>> d_init = d.initialize(is_sync_mode=True) | |||
| >>> d_wp = d_init.add_watchpoint(watchpoint_id=1, | |||
| >>> watch_condition=6, | |||
| >>> check_node_list={"conv2.bias" : {"device_id": [0], | |||
| root_graph_id: [0], "is_parameter": True}}, | |||
| >>> parameter_list=[dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> actual_value=0.0)]) | |||
| """ | |||
| print("Amir: ", check_node_list) | |||
| log("in Python AddWatchpoint") | |||
| parameter_list_inst = [] | |||
| for elem in parameter_list: | |||
| parameter_list_inst.append(elem.instance) | |||
| return self.dbg_instance.AddWatchpoint(watchpoint_id, watch_condition, check_node_list, parameter_list_inst) | |||
| @check_initialize_done | |||
| @check_remove_watchpoint | |||
| def remove_watchpoint(self, watchpoint_id): | |||
| """ | |||
| Removing watchpoint from Debug Service instance. | |||
| Args: | |||
| watchpoint_id (int): Watchpoint id | |||
| Returns: | |||
| Debug Service instance with removed watchpoint. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> d = dbg_services.DbgServices(dump_file_path="dump_file_path", | |||
| >>> verbose=True) | |||
| >>> d_init = d.initialize(is_sync_mode=True) | |||
| >>> d_wp = d_init.add_watchpoint(watchpoint_id=1, | |||
| >>> watch_condition=6, | |||
| >>> check_node_list={"conv2.bias" : {"device_id": [5], | |||
| root_graph_id: [0], "is_parameter": True}}, | |||
| >>> parameter_list=[dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> actual_value=0.0)]) | |||
| >>> d_wp = d_wp.remove_watchpoint(watchpoint_id=1) | |||
| """ | |||
| log("in Python Remove Watchpoint id ", watchpoint_id) | |||
| return self.dbg_instance.RemoveWatchpoint(watchpoint_id) | |||
| @check_initialize_done | |||
| @check_check_watchpoints | |||
| def check_watchpoints(self, iteration): | |||
| """ | |||
| Checking watchpoint at given iteration. | |||
| Args: | |||
| iteration (int): Watchpoint check iteration. | |||
| Returns: | |||
| Watchpoint hit list. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> d = dbg_services.DbgServices(dump_file_path="dump_file_path", | |||
| >>> verbose=True) | |||
| >>> d_init = d.initialize(is_sync_mode=True) | |||
| >>> d_wp = d_init.add_watchpoint(id=1, | |||
| >>> watch_condition=6, | |||
| >>> check_node_list={"conv2.bias" : {"device_id": [5], | |||
| root_graph_id: [0], "is_parameter": True}}, | |||
| >>> parameter_list=[dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> actual_value=0.0)]) | |||
| >>> watchpoints = d_wp.check_watchpoints(iteration=8) | |||
| """ | |||
| log("in Python CheckWatchpoints iteration ", iteration) | |||
| watchpoint_list = self.dbg_instance.CheckWatchpoints(iteration) | |||
| watchpoint_hit_list = [] | |||
| for watchpoint in watchpoint_list: | |||
| name = watchpoint.get_name() | |||
| slot = watchpoint.get_slot() | |||
| condition = watchpoint.get_condition() | |||
| watchpoint_id = watchpoint.get_watchpoint_id() | |||
| parameters = watchpoint.get_parameters() | |||
| error_code = watchpoint.get_error_code() | |||
| device_id = watchpoint.get_device_id() | |||
| root_graph_id = watchpoint.get_root_graph_id() | |||
| param_list = [] | |||
| for param in parameters: | |||
| p_name = param.get_name() | |||
| disabled = param.get_disabled() | |||
| value = param.get_value() | |||
| hit = param.get_hit() | |||
| actual_value = param.get_actual_value() | |||
| param_list.append(Parameter(p_name, disabled, value, hit, actual_value)) | |||
| watchpoint_hit_list.append(WatchpointHit(name, slot, condition, watchpoint_id, | |||
| param_list, error_code, device_id, root_graph_id)) | |||
| return watchpoint_hit_list | |||
| @check_initialize_done | |||
| @check_read_tensors | |||
| def read_tensors(self, info): | |||
| """ | |||
| Returning tensor data object describing the tensor requested tensor. | |||
| Args: | |||
| info (list): List of TensorInfo objects. | |||
| Returns: | |||
| TensorData list (list). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> d = dbg_services.DbgServices(dump_file_path="dump_file_path", | |||
| >>> verbose=True) | |||
| >>> d_init = d.initialize(is_sync_mode=True) | |||
| >>> tensor_data_list = d_init.read_tensors([dbg_services.TensorInfo(node_name="conv2.bias", | |||
| >>> slot=0, | |||
| >>> iteration=8, | |||
| >>> device_id=5, | |||
| >>> root_graph_id=0, | |||
| >>> is_parameter=True)]) | |||
| """ | |||
| log("in Python ReadTensors info ", info) | |||
| info_list_inst = [] | |||
| for elem in info: | |||
| log("in Python ReadTensors info ", info) | |||
| info_list_inst.append(elem.instance) | |||
| tensor_data_list = self.dbg_instance.ReadTensors(info_list_inst) | |||
| tensor_data_list_ret = [] | |||
| for elem in tensor_data_list: | |||
| if elem.get_data_size() == 0: | |||
| tensor_data = TensorData(b'', elem.get_data_size(), elem.get_dtype(), elem.get_shape()) | |||
| else: | |||
| tensor_data = TensorData(elem.get_data_ptr(), elem.get_data_size(), elem.get_dtype(), elem.get_shape()) | |||
| tensor_data_list_ret.append(tensor_data) | |||
| return tensor_data_list_ret | |||
| class TensorInfo(): | |||
| """ | |||
| Tensor Information class. | |||
| Args: | |||
| node_name (str): Fully qualified name of the desired node. | |||
| slot (int): The particular output for the requested node. | |||
| iteration (int): The desired itraretion to gather tensor information. | |||
| device_id (int): The desired device id to gather tensor information. | |||
| is_parameter (bool): Whether node is a parameter (input, constant, bias, parameter). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias", | |||
| >>> slot=0, | |||
| >>> iteration=8, | |||
| >>> device_id=5, | |||
| >>> root_graph_id=0, | |||
| >>> is_parameter=True) | |||
| """ | |||
| @check_tensor_info_init | |||
| def __init__(self, node_name, slot, iteration, device_id, root_graph_id, is_parameter): | |||
| self.instance = cds.tensor_info(node_name, slot, iteration, device_id, root_graph_id, is_parameter) | |||
| @property | |||
| def node_name(self): | |||
| """ | |||
| Function to receive TensorInfo node_name. | |||
| Returns: | |||
| node_name of TensorInfo instance (str). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias", | |||
| >>> slot=0, | |||
| >>> iteration=8, | |||
| >>> device_id=5, | |||
| >>> root_graph_id=0, | |||
| >>> is_parameter=True) | |||
| >>> name = tensor_info.node_name | |||
| """ | |||
| return self.instance.get_node_name() | |||
| @property | |||
| def slot(self): | |||
| """ | |||
| Function to receive TensorInfo slot. | |||
| Returns: | |||
| slot of TensorInfo instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias", | |||
| >>> slot=0, | |||
| >>> iteration=8, | |||
| >>> device_id=5, | |||
| >>> root_graph_id=0, | |||
| >>> is_parameter=True) | |||
| >>> slot = tensor_info.slot | |||
| """ | |||
| return self.instance.get_slot() | |||
| @property | |||
| def iteration(self): | |||
| """ | |||
| Function to receive TensorInfo iteration. | |||
| Returns: | |||
| iteration of TensorInfo instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias", | |||
| >>> slot=0, | |||
| >>> iteration=8, | |||
| >>> device_id=5, | |||
| >>> root_graph_id=0, | |||
| >>> is_parameter=True) | |||
| >>> iteration = tensor_info.iteration | |||
| """ | |||
| return self.instance.get_iteration() | |||
| @property | |||
| def device_id(self): | |||
| """ | |||
| Function to receive TensorInfo device_id. | |||
| Returns: | |||
| device_id of TensorInfo instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias", | |||
| >>> slot=0, | |||
| >>> iteration=8, | |||
| >>> device_id=5, | |||
| >>> root_graph_id=0, | |||
| >>> is_parameter=True) | |||
| >>> device_id = tensor_info.device_id | |||
| """ | |||
| @property | |||
| def root_graph_id(self): | |||
| """ | |||
| Function to receive TensorInfo root_graph_id. | |||
| Returns: | |||
| root_graph_id of TensorInfo instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias", | |||
| >>> slot=0, | |||
| >>> iteration=8, | |||
| >>> device_id=5, | |||
| >>> root_graph_id=0, | |||
| >>> is_parameter=True) | |||
| >>> device_id = tensor_info.root_graph_id | |||
| """ | |||
| return self.instance.get_root_graph_id() | |||
| @property | |||
| def is_parameter(self): | |||
| """ | |||
| Function to receive TensorInfo is_parameter. | |||
| Returns: | |||
| is_parameter of TensorInfo instance (bool). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias", | |||
| >>> slot=0, | |||
| >>> iteration=8, | |||
| >>> device_id=5, | |||
| >>> root_graph_id=0, | |||
| >>> is_parameter=True) | |||
| >>> is_parameter = tensor_info.is_parameter | |||
| """ | |||
| return self.instance.get_is_parameter() | |||
| class TensorData(): | |||
| """ | |||
| TensorData class. | |||
| Args: | |||
| data_ptr (byte): Data pointer. | |||
| data_size (int): Size of data in bytes. | |||
| dtype (int): An encoding representing the type of TensorData. | |||
| shape (list): Shape of tensor. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0', | |||
| >>> data_size=4, | |||
| >>> dtype=0, | |||
| >>> shape=[2, 2]) | |||
| """ | |||
| @check_tensor_data_init | |||
| def __init__(self, data_ptr, data_size, dtype, shape): | |||
| self.instance = cds.tensor_data(data_ptr, data_size, dtype, shape) | |||
| @property | |||
| def data_ptr(self): | |||
| """ | |||
| Function to receive TensorData data_ptr. | |||
| Returns: | |||
| data_ptr of TensorData instance (byte). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0', | |||
| >>> data_size=4, | |||
| >>> dtype=0, | |||
| >>> shape=[2, 2]) | |||
| >>> data_ptr = tensor_data.data_ptr | |||
| """ | |||
| return self.instance.get_data_ptr() | |||
| @property | |||
| def data_size(self): | |||
| """ | |||
| Function to receive TensorData data_size. | |||
| Returns: | |||
| data_size of TensorData instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0', | |||
| >>> data_size=4, | |||
| >>> dtype=0, | |||
| >>> shape=[2, 2]) | |||
| >>> data_size = tensor_data.data_size | |||
| """ | |||
| return self.instance.get_data_size() | |||
| @property | |||
| def dtype(self): | |||
| """ | |||
| Function to receive TensorData dtype. | |||
| Returns: | |||
| dtype of TensorData instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0', | |||
| >>> data_size=4, | |||
| >>> dtype=0, | |||
| >>> shape=[2, 2]) | |||
| >>> dtype = tensor_data.dtype | |||
| """ | |||
| return self.instance.get_dtype() | |||
| @property | |||
| def shape(self): | |||
| """ | |||
| Function to receive TensorData shape. | |||
| Returns: | |||
| shape of TensorData instance (list). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0', | |||
| >>> data_size=4, | |||
| >>> dtype=0, | |||
| >>> shape=[2, 2]) | |||
| >>> shape = tensor_data.shape | |||
| """ | |||
| return self.instance.get_shape() | |||
| class WatchpointHit(): | |||
| """ | |||
| WatchpointHit class. | |||
| Args: | |||
| name (str): Name of WatchpointHit instance. | |||
| slot (int): The numerical label of an output. | |||
| condition (int): A representation of the condition to be checked. | |||
| watchpoint_id (int): Watchpoint id. | |||
| parameters (list): A list of all parameters for WatchpointHit instance. | |||
| Parameters have to be instances of Parameter class. | |||
| error_code (int): An explanation of certain scenarios where watchpoint could not be checked. | |||
| device_id (int): Device id where the watchpoint is hit. | |||
| root_graph_id (int): Root graph id where the watchpoint is hit. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| """ | |||
| @check_watchpoint_hit_init | |||
| def __init__(self, name, slot, condition, watchpoint_id, parameters, error_code, device_id, root_graph_id): | |||
| parameter_list_inst = [] | |||
| for elem in parameters: | |||
| parameter_list_inst.append(elem.instance) | |||
| self.instance = cds.watchpoint_hit(name, slot, condition, watchpoint_id, | |||
| parameter_list_inst, error_code, device_id, root_graph_id) | |||
| @property | |||
| def name(self): | |||
| """ | |||
| Function to receive WatchpointHit name. | |||
| Returns: | |||
| name of WatchpointHit instance (str). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| >>> name = watchpoint_hit.name | |||
| """ | |||
| return self.instance.get_name() | |||
| @property | |||
| def slot(self): | |||
| """ | |||
| Function to receive WatchpointHit slot. | |||
| Returns: | |||
| slot of WatchpointHit instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| >>> slot = watchpoint_hit.slot | |||
| """ | |||
| return self.instance.get_slot() | |||
| @property | |||
| def condition(self): | |||
| """ | |||
| Function to receive WatchpointHit condition. | |||
| Returns: | |||
| condition of WatchpointHit instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| >>> condition = watchpoint_hit.condition | |||
| """ | |||
| return self.instance.get_condition() | |||
| @property | |||
| def watchpoint_id(self): | |||
| """ | |||
| Function to receive WatchpointHit watchpoint_id. | |||
| Returns: | |||
| watchpoint_id of WatchpointHit instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| >>> watchpoint_id = watchpoint_hit.watchpoint_id | |||
| """ | |||
| return self.instance.get_watchpoint_id() | |||
| @property | |||
| def parameters(self): | |||
| """ | |||
| Function to receive WatchpointHit parameters. | |||
| Returns: | |||
| List of parameters of WatchpointHit instance (list). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| >>> parameters = watchpoint_hit.parameters | |||
| """ | |||
| params = self.instance.get_parameters() | |||
| param_list = [] | |||
| for elem in params: | |||
| tmp = Parameter(elem.get_name(), | |||
| elem.get_disabled(), | |||
| elem.get_value(), | |||
| elem.get_hit(), | |||
| elem.get_actual_value()) | |||
| param_list.append(tmp) | |||
| return param_list | |||
| @property | |||
| def error_code(self): | |||
| """ | |||
| Function to receive WatchpointHit error_code. | |||
| Returns: | |||
| error_code of WatchpointHit instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| >>> error_code = watchpoint_hit.error_code | |||
| """ | |||
| return self.instance.get_error_code() | |||
| @property | |||
| def device_id(self): | |||
| """ | |||
| Function to receive WatchpointHit device_id. | |||
| Returns: | |||
| device_id of WatchpointHit instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| >>> device_id = watchpoint_hit.device_id | |||
| """ | |||
| return self.instance.get_device_id() | |||
| @property | |||
| def root_graph_id(self): | |||
| """ | |||
| Function to receive WatchpointHit root_graph_id. | |||
| Returns: | |||
| root_graph_id of WatchpointHit instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| >>> root_graph_id = watchpoint_hit.root_graph_id | |||
| """ | |||
| return self.instance.get_root_graph_id() | |||
| class Parameter(): | |||
| """ | |||
| Parameter class. | |||
| Args: | |||
| name (str): Name of the parameter. | |||
| disabled (bool): Whether parameter is used in backend. | |||
| value (float): Threshold value of the parameter. | |||
| hit (bool): Whether this parameter triggered watchpoint (default is False). | |||
| actual_value (float): Actual value of the parameter (default is 0.0). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> parameter = dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> actual_value=0.0) | |||
| """ | |||
| @check_parameter_init | |||
| def __init__(self, name, disabled, value, hit=False, actual_value=0.0): | |||
| self.instance = cds.parameter(name, disabled, value, hit, actual_value) | |||
| @property | |||
| def name(self): | |||
| """ | |||
| Function to receive Parameter name. | |||
| Returns: | |||
| name of Parameter instance (str). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> parameter = dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> name = watchpoint_hit.name | |||
| """ | |||
| return self.instance.get_name() | |||
| @property | |||
| def disabled(self): | |||
| """ | |||
| Function to receive Parameter disabled value. | |||
| Returns: | |||
| disabled of Parameter instance (bool). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> parameter = dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> disabled = watchpoint_hit.disabled | |||
| """ | |||
| return self.instance.get_disabled() | |||
| @property | |||
| def value(self): | |||
| """ | |||
| Function to receive Parameter value. | |||
| Returns: | |||
| value of Parameter instance (float). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> parameter = dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> value = watchpoint_hit.value | |||
| """ | |||
| return self.instance.get_value() | |||
| @property | |||
| def hit(self): | |||
| """ | |||
| Function to receive Parameter hit value. | |||
| Returns: | |||
| hit of Parameter instance (bool). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> parameter = dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> hit = watchpoint_hit.hit | |||
| """ | |||
| return self.instance.get_hit() | |||
| @property | |||
| def actual_value(self): | |||
| """ | |||
| Function to receive Parameter actual_value value. | |||
| Returns: | |||
| actual_value of Parameter instance (float). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> parameter = dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> actual_value = watchpoint_hit.actual_value | |||
| """ | |||
| return self.instance.get_actual_value() | |||
| @@ -0,0 +1,24 @@ | |||
| python sync_trans_false_read_tensors.py > sync_trans_false_read_tensors.actual | |||
| diff sync_trans_false_read_tensors.actual sync_trans_false_read_tensors.expected | |||
| if [ $? -eq 0 ]; then | |||
| echo sync_trans_false_read_tensors PASSED | |||
| else | |||
| echo sync_trans_false_read_tensors FAILED | |||
| fi | |||
| python sync_trans_true_read_tensors.py > sync_trans_true_read_tensors.actual | |||
| diff sync_trans_true_read_tensors.actual sync_trans_true_read_tensors.expected | |||
| if [ $? -eq 0 ]; then | |||
| echo sync_trans_true_read_tensors PASSED | |||
| else | |||
| echo sync_trans_true_read_tensors FAILED | |||
| fi | |||
| python sync_trans_false_watchpoints.py > sync_trans_false_watchpoints.actual | |||
| diff sync_trans_false_watchpoints.actual sync_trans_false_watchpoints.expected | |||
| if [ $? -eq 0 ]; then | |||
| echo sync_trans_false_watchpoints PASSED | |||
| else | |||
| echo sync_trans_false_watchpoints FAILED | |||
| fi | |||
| @@ -0,0 +1,70 @@ | |||
| ----------------------------------------------------------- | |||
| tensor_info_1 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias | |||
| slot = 0 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = True | |||
| tensor_data_1 attributes: | |||
| data (printed in uint8) = [ 0 0 0 0 195 127 0 0 176 202 195 248 194 127 0 0 0 0 | |||
| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 58 196 248 | |||
| 194 127 0 0 17 0 0 0 0 0 0 0 160 76 6 140 195 127 | |||
| 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 0 0 | |||
| 64 195 195 248 194 127 0 0 0 0 0 0 0 0 0 0 0 0 | |||
| 0 0 0 0 0 0 88 1 196 248 194 127 0 0 18 0 0 0 | |||
| 0 0 0 0 160 47 6 140 195 127 0 0 69 0 0 0 0 0 | |||
| 0 0 1 0 0 0 195 127 0 0 176 203 195 248 194 127 0 0 | |||
| 176 204 195 248 194 127 0 0 0 0 0 0 0 0 0 0 216 241 | |||
| 195 248 194 127 0 0 19 0 0 0 0 0 0 0 96 39 6 140 | |||
| 195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 | |||
| 0 0 112 52 196 248 194 127 0 0 176 52 196 248 194 127 0 0 | |||
| 0 0 0 0 0 0 0 0 88 250 195 248 194 127 0 0 20 0 | |||
| 0 0 0 0 0 0 128 130 5 140 195 127 0 0 69 0 0 0 | |||
| 0 0 0 0 0 0 0 0 195 127 0 0 208 136 195 248 194 127 | |||
| 0 0 176 202 195 248 194 127 0 0 48 52 196 248 194 127 0 0 | |||
| 184 247 195 248 194 127 0 0 21 0 0 0 0 0 0 0 176 213 | |||
| 4 140 195 127 0 0 69 0 0 0 0 0 0 0 0 0 0 0 | |||
| 195 127 0 0 48 52 196 248 194 127 0 0 0 0 0 0 0 0 | |||
| 0 0 0 0 0 0 0 0 0 0 8 249 195 248 194 127 0 0 | |||
| 22 0 0 0 0 0 0 0 16 46 4 140 195 127 0 0 69 0 | |||
| 0 0 0 0 0 0 1 0 0 0 195 127 0 0 64 137 195 248 | |||
| 194 127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |||
| 0 0 88 12 196 248 194 127 0 0 23 0 0 0 0 0 0 0 | |||
| 32 137 3 140 195 127 0 0 85 0 0 0 0 0 0 0 0 0 | |||
| 0 0 195 127 0 0 176 202 195 248 194 127 0 0 0 0 0 0 | |||
| 0 0 0 0 0 0 0 0 0 0 0 0 104 246 195 248 194 127 | |||
| 0 0 24 0 0 0 0 0 0 0 48 104 15 140 195 127 0 0 | |||
| 32 104 15 140 195 127 0 0] | |||
| size in bytes = 512 | |||
| debugger dtype = 11 | |||
| shape = [128] | |||
| ----------------------------------------------------------- | |||
| tensor_info_2 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308 | |||
| slot = 0 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = False | |||
| tensor_data_2 attributes: | |||
| data (printed in uint8) = [ 0 169 0 ... 152 242 63] | |||
| size in bytes = 4153344 | |||
| debugger dtype = 11 | |||
| shape = [32, 192, 13, 13] | |||
| ----------------------------------------------------------- | |||
| tensor_info_3 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300 | |||
| slot = 1 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = False | |||
| tensor_data_3 attributes: | |||
| data (printed in uint8) = [ 0 169 0 ... 217 4 52] | |||
| size in bytes = 831744 | |||
| debugger dtype = 8 | |||
| shape = [207936] | |||
| @@ -0,0 +1,74 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| Read tensor test script for offline debugger APIs. | |||
| """ | |||
| import mindspore.offline_debug.dbg_services as d | |||
| import numpy as np | |||
| def main(): | |||
| debugger_backend = d.DbgServices( | |||
| dump_file_path="/home/jtzanaka/dumps/sync_trans_false/032421/alexnet") | |||
| _ = debugger_backend.initialize( | |||
| net_name="Network Name goes here!", is_sync_mode=True) | |||
| # parameter | |||
| info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias", | |||
| slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True) | |||
| # output tensor with zero slot | |||
| info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308", | |||
| slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False) | |||
| # output tensor with non-zero slot | |||
| info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300", | |||
| slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False) | |||
| tensor_info = [info1, info2, info3] | |||
| tensor_data = debugger_backend.read_tensors(tensor_info) | |||
| print_read_tensors(tensor_info, tensor_data) | |||
| def print_read_tensors(tensor_info, tensor_data): | |||
| """Print read tensors.""" | |||
| for x, _ in enumerate(tensor_info): | |||
| print("-----------------------------------------------------------") | |||
| print("tensor_info_" + str(x+1) + " attributes:") | |||
| print("node name = ", tensor_info[x].node_name) | |||
| print("slot = ", tensor_info[x].slot) | |||
| print("iteration = ", tensor_info[x].iteration) | |||
| print("device_id = ", tensor_info[x].device_id) | |||
| print("root_graph_id = ", tensor_info[x].root_graph_id) | |||
| print("is_parameter = ", tensor_info[x].is_parameter) | |||
| print() | |||
| print("tensor_data_" + str(x+1) + " attributes:") | |||
| print("data (printed in uint8) = ", np.frombuffer( | |||
| tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) | |||
| py_byte_size = len(tensor_data[x].data_ptr) | |||
| c_byte_size = tensor_data[x].data_size | |||
| if c_byte_size != py_byte_size: | |||
| print("The python byte size of ", py_byte_size, | |||
| " does not match the C++ byte size of ", c_byte_size) | |||
| print("size in bytes = ", tensor_data[x].data_size) | |||
| print("debugger dtype = ", tensor_data[x].dtype) | |||
| print("shape = ", tensor_data[x].shape) | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -0,0 +1,33 @@ | |||
| ----------------------------------------------------------- | |||
| watchpoint_hit for test_1 attributes: | |||
| name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308 | |||
| slot = 0 | |||
| condition = 6 | |||
| watchpoint_id = 1 | |||
| parameter 0 name = param | |||
| parameter 0 disabled = False | |||
| parameter 0 value = 0.0 | |||
| parameter 0 hit = True | |||
| parameter 0 actual_value = -2.429065704345703 | |||
| error code = 0 | |||
| device_id = 0 | |||
| root_graph_id = 0 | |||
| ----------------------------------------------------------- | |||
| watchpoint_hit for test_4 attributes: | |||
| name = Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc3.bias | |||
| slot = 0 | |||
| condition = 18 | |||
| watchpoint_id = 3 | |||
| parameter 0 name = abs_mean_update_ratio_gt | |||
| parameter 0 disabled = False | |||
| parameter 0 value = 0.0 | |||
| parameter 0 hit = True | |||
| parameter 0 actual_value = 1.793662034335766e-35 | |||
| parameter 1 name = epsilon | |||
| parameter 1 disabled = True | |||
| parameter 1 value = 0.0 | |||
| parameter 1 hit = False | |||
| parameter 1 actual_value = 0.0 | |||
| error code = 0 | |||
| device_id = 0 | |||
| root_graph_id = 0 | |||
| @@ -0,0 +1,109 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| Watchpoints test script for offline debugger APIs. | |||
| """ | |||
| import mindspore.offline_debug.dbg_services as d | |||
| def main(): | |||
| debugger_backend = d.DbgServices( | |||
| dump_file_path="/home/jtzanaka/dumps/sync_trans_false/032421/alexnet") | |||
| _ = debugger_backend.initialize( | |||
| net_name="Network Name goes here!", is_sync_mode=True) | |||
| # NOTES: | |||
| # -> watch_condition=6 is MIN_LT | |||
| # -> watch_condition=18 is CHANGE_TOO_LARGE | |||
| # test 1: watchpoint set and hit (watch_condition=6) | |||
| param1 = d.Parameter(name="param", disabled=False, value=0.0) | |||
| _ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6, | |||
| check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/" | |||
| "Conv2D-op308": | |||
| {"device_id": [0], "root_graph_id": [0], "is_parameter": False | |||
| }}, parameter_list=[param1]) | |||
| watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2) | |||
| if len(watchpoint_hits_test_1) != 1: | |||
| print("ERROR -> test 1: watchpoint set but not hit just once") | |||
| print_watchpoint_hits(watchpoint_hits_test_1, 1) | |||
| # test 2: watchpoint remove and ensure it's not hit | |||
| _ = debugger_backend.remove_watchpoint(watchpoint_id=1) | |||
| watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2) | |||
| if watchpoint_hits_test_2: | |||
| print("ERROR -> test 2: watchpoint removed but hit") | |||
| # test 3: watchpoint set and not hit, then remove | |||
| param2 = d.Parameter(name="param", disabled=False, value=-1000.0) | |||
| _ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6, | |||
| check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/" | |||
| "Conv2D-op308": | |||
| {"device_id": [0], "root_graph_id": [0], "is_parameter": False | |||
| }}, parameter_list=[param2]) | |||
| watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2) | |||
| if watchpoint_hits_test_3: | |||
| print("ERROR -> test 3: watchpoint set but not supposed to be hit") | |||
| _ = debugger_backend.remove_watchpoint(watchpoint_id=2) | |||
| # test 4: weight change watchpoint set and hit | |||
| param_abs_mean_update_ratio_gt = d.Parameter( | |||
| name="abs_mean_update_ratio_gt", disabled=False, value=0.0) | |||
| param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0) | |||
| _ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18, | |||
| check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/" | |||
| "Parameter[6]_11/fc3.bias": | |||
| {"device_id": [0], "root_graph_id": [0], "is_parameter": True | |||
| }}, parameter_list=[param_abs_mean_update_ratio_gt, | |||
| param_epsilon]) | |||
| watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3) | |||
| if len(watchpoint_hits_test_4) != 1: | |||
| print("ERROR -> test 4: watchpoint weight change set but not hit just once") | |||
| print_watchpoint_hits(watchpoint_hits_test_4, 4) | |||
| def print_watchpoint_hits(watchpoint_hits, test_id): | |||
| """Print watchpoint hits.""" | |||
| for x, _ in enumerate(watchpoint_hits): | |||
| print("-----------------------------------------------------------") | |||
| print("watchpoint_hit for test_%u attributes:" % test_id) | |||
| print("name = ", watchpoint_hits[x].name) | |||
| print("slot = ", watchpoint_hits[x].slot) | |||
| print("condition = ", watchpoint_hits[x].condition) | |||
| print("watchpoint_id = ", watchpoint_hits[x].watchpoint_id) | |||
| for p, _ in enumerate(watchpoint_hits[x].parameters): | |||
| print("parameter ", p, " name = ", | |||
| watchpoint_hits[x].parameters[p].name) | |||
| print("parameter ", p, " disabled = ", | |||
| watchpoint_hits[x].parameters[p].disabled) | |||
| print("parameter ", p, " value = ", | |||
| watchpoint_hits[x].parameters[p].value) | |||
| print("parameter ", p, " hit = ", | |||
| watchpoint_hits[x].parameters[p].hit) | |||
| print("parameter ", p, " actual_value = ", | |||
| watchpoint_hits[x].parameters[p].actual_value) | |||
| print("error code = ", watchpoint_hits[x].error_code) | |||
| print("device_id = ", watchpoint_hits[x].device_id) | |||
| print("root_graph_id = ", watchpoint_hits[x].root_graph_id) | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -0,0 +1,70 @@ | |||
| ----------------------------------------------------------- | |||
| tensor_info_1 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias | |||
| slot = 0 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = True | |||
| tensor_data_1 attributes: | |||
| data (printed in uint8) = [ 1 0 0 0 195 127 0 0 80 58 118 65 195 127 0 0 0 0 | |||
| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 40 186 117 65 | |||
| 195 127 0 0 5 0 0 0 0 0 0 0 160 76 6 204 195 127 | |||
| 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 0 0 | |||
| 48 135 117 65 195 127 0 0 16 58 118 65 195 127 0 0 144 58 | |||
| 118 65 195 127 0 0 168 186 117 65 195 127 0 0 6 0 0 0 | |||
| 0 0 0 0 160 47 6 204 195 127 0 0 69 0 0 0 0 0 | |||
| 0 0 1 0 0 0 195 127 0 0 80 58 118 65 195 127 0 0 | |||
| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 184 249 | |||
| 117 65 195 127 0 0 7 0 0 0 0 0 0 0 96 39 6 204 | |||
| 195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 | |||
| 0 0 224 218 117 65 195 127 0 0 0 0 0 0 0 0 0 0 | |||
| 224 219 117 65 195 127 0 0 200 17 118 65 195 127 0 0 8 0 | |||
| 0 0 0 0 0 0 128 130 5 204 195 127 0 0 69 0 0 0 | |||
| 0 0 0 0 1 0 0 0 195 127 0 0 120 233 255 59 196 127 | |||
| 0 0 224 217 117 65 195 127 0 0 224 214 117 65 195 127 0 0 | |||
| 120 250 117 65 195 127 0 0 9 0 0 0 0 0 0 0 176 213 | |||
| 4 204 195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0 | |||
| 195 127 0 0 240 66 118 65 195 127 0 0 160 218 117 65 195 127 | |||
| 0 0 224 215 117 65 195 127 0 0 40 9 118 65 195 127 0 0 | |||
| 10 0 0 0 0 0 0 0 16 46 4 204 195 127 0 0 69 0 | |||
| 0 0 0 0 0 0 1 0 0 0 195 127 0 0 208 59 118 65 | |||
| 195 127 0 0 0 0 0 0 0 0 0 0 96 218 117 65 195 127 | |||
| 0 0 56 251 117 65 195 127 0 0 11 0 0 0 0 0 0 0 | |||
| 32 137 3 204 195 127 0 0 85 0 0 0 0 0 0 0 1 0 | |||
| 0 0 195 127 0 0 224 214 117 65 195 127 0 0 144 59 118 65 | |||
| 195 127 0 0 160 214 117 65 195 127 0 0 136 62 118 65 195 127 | |||
| 0 0 12 0 0 0 0 0 0 0 48 104 15 204 195 127 0 0 | |||
| 32 104 15 204 195 127 0 0] | |||
| size in bytes = 512 | |||
| debugger dtype = 11 | |||
| shape = [128] | |||
| ----------------------------------------------------------- | |||
| tensor_info_2 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308 | |||
| slot = 0 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = False | |||
| tensor_data_2 attributes: | |||
| data (printed in uint8) = [206 239 74 ... 53 201 62] | |||
| size in bytes = 4153344 | |||
| debugger dtype = 11 | |||
| shape = [32, 192, 13, 13] | |||
| ----------------------------------------------------------- | |||
| tensor_info_3 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300 | |||
| slot = 1 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = False | |||
| tensor_data_3 attributes: | |||
| data (printed in uint8) = [206 239 74 ... 16 239 51] | |||
| size in bytes = 831744 | |||
| debugger dtype = 8 | |||
| shape = [207936] | |||
| @@ -0,0 +1,74 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| Read tensor test script for offline debugger APIs. | |||
| """ | |||
| import mindspore.offline_debug.dbg_services as d | |||
| import numpy as np | |||
| def main(): | |||
| debugger_backend = d.DbgServices( | |||
| dump_file_path="/home/jtzanaka/dumps/sync_trans_true/032421/alexnet") | |||
| _ = debugger_backend.initialize( | |||
| net_name="Network Name goes here!", is_sync_mode=True) | |||
| # parameter | |||
| info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias", | |||
| slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True) | |||
| # output tensor with zero slot | |||
| info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308", | |||
| slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False) | |||
| # output tensor with non-zero slot | |||
| info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300", | |||
| slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False) | |||
| tensor_info = [info1, info2, info3] | |||
| tensor_data = debugger_backend.read_tensors(tensor_info) | |||
| print_read_tensors(tensor_info, tensor_data) | |||
| def print_read_tensors(tensor_info, tensor_data): | |||
| """Print read tensors.""" | |||
| for x, _ in enumerate(tensor_info): | |||
| print("-----------------------------------------------------------") | |||
| print("tensor_info_" + str(x+1) + " attributes:") | |||
| print("node name = ", tensor_info[x].node_name) | |||
| print("slot = ", tensor_info[x].slot) | |||
| print("iteration = ", tensor_info[x].iteration) | |||
| print("device_id = ", tensor_info[x].device_id) | |||
| print("root_graph_id = ", tensor_info[x].root_graph_id) | |||
| print("is_parameter = ", tensor_info[x].is_parameter) | |||
| print() | |||
| print("tensor_data_" + str(x+1) + " attributes:") | |||
| print("data (printed in uint8) = ", np.frombuffer( | |||
| tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) | |||
| py_byte_size = len(tensor_data[x].data_ptr) | |||
| c_byte_size = tensor_data[x].data_size | |||
| if c_byte_size != py_byte_size: | |||
| print("The python byte size of ", py_byte_size, | |||
| " does not match the C++ byte size of ", c_byte_size) | |||
| print("size in bytes = ", tensor_data[x].data_size) | |||
| print("debugger dtype = ", tensor_data[x].dtype) | |||
| print("shape = ", tensor_data[x].shape) | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -0,0 +1,66 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "pybind11/pybind11.h" | |||
| #include "pybind11/stl.h" | |||
| #include "pybind11/stl_bind.h" | |||
| #include "debugger/offline_debug/dbg_services.h" | |||
| PYBIND11_MODULE(_mindspore_offline_debug, m) { | |||
| m.doc() = "pybind11 debug services api"; | |||
| py::class_<DbgServices>(m, "DbgServices") | |||
| .def(py::init<bool>()) | |||
| .def("Initialize", &DbgServices::Initialize) | |||
| .def("AddWatchpoint", &DbgServices::AddWatchpoint) | |||
| .def("RemoveWatchpoint", &DbgServices::RemoveWatchpoint) | |||
| .def("CheckWatchpoints", &DbgServices::CheckWatchpoints) | |||
| .def("ReadTensors", &DbgServices::ReadTensors) | |||
| .def("GetVersion", &DbgServices::GetVersion); | |||
| py::class_<parameter>(m, "parameter") | |||
| .def(py::init<std::string, bool, double, bool, double>()) | |||
| .def("get_name", ¶meter::get_name) | |||
| .def("get_disabled", ¶meter::get_disabled) | |||
| .def("get_value", ¶meter::get_value) | |||
| .def("get_hit", ¶meter::get_hit) | |||
| .def("get_actual_value", ¶meter::get_actual_value); | |||
| py::class_<watchpoint_hit>(m, "watchpoint_hit") | |||
| .def(py::init<std::string, uint32_t, int, uint32_t, std::vector<parameter_t>, int32_t, uint32_t, uint32_t>()) | |||
| .def("get_name", &watchpoint_hit::get_name) | |||
| .def("get_slot", &watchpoint_hit::get_slot) | |||
| .def("get_condition", &watchpoint_hit::get_condition) | |||
| .def("get_watchpoint_id", &watchpoint_hit::get_watchpoint_id) | |||
| .def("get_parameters", &watchpoint_hit::get_parameters) | |||
| .def("get_error_code", &watchpoint_hit::get_error_code) | |||
| .def("get_device_id", &watchpoint_hit::get_device_id) | |||
| .def("get_root_graph_id", &watchpoint_hit::get_root_graph_id); | |||
| py::class_<tensor_info>(m, "tensor_info") | |||
| .def(py::init<std::string, uint32_t, uint32_t, uint32_t, uint32_t, bool>()) | |||
| .def("get_node_name", &tensor_info::get_node_name) | |||
| .def("get_slot", &tensor_info::get_slot) | |||
| .def("get_iteration", &tensor_info::get_iteration) | |||
| .def("get_device_id", &tensor_info::get_device_id) | |||
| .def("get_root_graph_id", &tensor_info::get_root_graph_id) | |||
| .def("get_is_parameter", &tensor_info::get_is_parameter); | |||
| py::class_<tensor_data>(m, "tensor_data") | |||
| .def(py::init<char *, uint64_t, int, std::vector<int64_t>>()) | |||
| .def("get_data_ptr", &tensor_data::get_data_ptr) | |||
| .def("get_data_size", &tensor_data::get_data_size) | |||
| .def("get_dtype", &tensor_data::get_dtype) | |||
| .def("get_shape", &tensor_data::get_shape); | |||
| } | |||
| @@ -0,0 +1,123 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| General Validator Helper Functions. | |||
| """ | |||
| import os | |||
| import inspect | |||
| UINT32_MAX = 4294967295 | |||
| UINT32_MIN = 0 | |||
| UINT64_MAX = 18446744073709551615 | |||
| UINT64_MIN = 0 | |||
| def pad_arg_name(arg_name): | |||
| if arg_name != "": | |||
| arg_name = arg_name + " " | |||
| return arg_name | |||
| def check_value(arg, valid_range, arg_name=""): | |||
| arg_name = pad_arg_name(arg_name) | |||
| if arg < valid_range[0] or arg > valid_range[1]: | |||
| raise ValueError( | |||
| "Input {0}is not within the required interval of ({1} to {2}).".format(arg_name, | |||
| valid_range[0], valid_range[1])) | |||
| def check_uint32(arg, arg_name=""): | |||
| type_check(arg, (int,), arg_name) | |||
| check_value(arg, [UINT32_MIN, UINT32_MAX]) | |||
| def check_uint64(arg, arg_name=""): | |||
| type_check(arg, (int,), arg_name) | |||
| check_value(arg, [UINT64_MIN, UINT64_MAX]) | |||
| def check_dir(dataset_dir): | |||
| if not os.path.isdir(dataset_dir) or not os.access(dataset_dir, os.R_OK): | |||
| raise ValueError("The folder {} does not exist or permission denied!".format(dataset_dir)) | |||
| def parse_user_args(method, *args, **kwargs): | |||
| """ | |||
| Parse user arguments in a function. | |||
| Args: | |||
| method (method): a callable function. | |||
| args: user passed args. | |||
| kwargs: user passed kwargs. | |||
| Returns: | |||
| user_filled_args (list): values of what the user passed in for the arguments. | |||
| ba.arguments (Ordered Dict): ordered dict of parameter and argument for what the user has passed. | |||
| """ | |||
| sig = inspect.signature(method) | |||
| if 'self' in sig.parameters or 'cls' in sig.parameters: | |||
| ba = sig.bind(method, *args, **kwargs) | |||
| ba.apply_defaults() | |||
| params = list(sig.parameters.keys())[1:] | |||
| else: | |||
| ba = sig.bind(*args, **kwargs) | |||
| ba.apply_defaults() | |||
| params = list(sig.parameters.keys()) | |||
| user_filled_args = [ba.arguments.get(arg_value) for arg_value in params] | |||
| return user_filled_args, ba.arguments | |||
| def type_check(arg, types, arg_name): | |||
| """ | |||
| Check the type of the parameter. | |||
| Args: | |||
| arg (Any) : any variable. | |||
| types (tuple): tuple of all valid types for arg. | |||
| arg_name (str): the name of arg. | |||
| Returns: | |||
| Exception: when the type is not correct, otherwise nothing. | |||
| """ | |||
| # handle special case of booleans being a subclass of ints | |||
| print_value = '\"\"' if repr(arg) == repr('') else arg | |||
| if int in types and bool not in types: | |||
| if isinstance(arg, bool): | |||
| raise TypeError("Argument {0} with value {1} is not of type {2}.".format(arg_name, print_value, types)) | |||
| if not isinstance(arg, types): | |||
| raise TypeError("Argument {0} with value {1} is not of type {2}.".format(arg_name, print_value, types)) | |||
| def type_check_list(args, types, arg_names): | |||
| """ | |||
| Check the type of each parameter in the list. | |||
| Args: | |||
| args (Union[list, tuple]): a list or tuple of any variable. | |||
| types (tuple): tuple of all valid types for arg. | |||
| arg_names (Union[list, tuple of str]): the names of args. | |||
| Returns: | |||
| Exception: when the type is not correct, otherwise nothing. | |||
| """ | |||
| type_check(args, (list, tuple,), arg_names) | |||
| if len(args) != len(arg_names) and not isinstance(arg_names, str): | |||
| raise ValueError("List of arguments is not the same length as argument_names.") | |||
| if isinstance(arg_names, str): | |||
| arg_names = ["{0}[{1}]".format(arg_names, i) for i in range(len(args))] | |||
| for arg, arg_name in zip(args, arg_names): | |||
| type_check(arg, types, arg_name) | |||
| @@ -0,0 +1,223 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| Validator Functions for Offline Debugger APIs. | |||
| """ | |||
| from functools import wraps | |||
| import dbg_services as cds | |||
| from mi_validator_helpers import parse_user_args, type_check, type_check_list, check_dir, check_uint32, check_uint64 | |||
| def check_init(method): | |||
| """Wrapper method to check the parameters of DbgServices init.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [dump_file_path, verbose], _ = parse_user_args(method, *args, **kwargs) | |||
| type_check(dump_file_path, (str,), "dump_file_path") | |||
| type_check(verbose, (bool,), "verbose") | |||
| check_dir(dump_file_path) | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_initialize(method): | |||
| """Wrapper method to check the parameters of DbgServices Initialize method.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [net_name, is_sync_mode], _ = parse_user_args(method, *args, **kwargs) | |||
| type_check(net_name, (str,), "net_name") | |||
| type_check(is_sync_mode, (bool,), "is_sync_mode") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_add_watchpoint(method): | |||
| """Wrapper method to check the parameters of DbgServices AddWatchpoint.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [id_value, watch_condition, check_node_list, parameter_list], _ = parse_user_args(method, *args, **kwargs) | |||
| check_uint32(id_value, "id") | |||
| check_uint32(watch_condition, "watch_condition") | |||
| type_check(check_node_list, (dict,), "check_node_list") | |||
| for node_name, node_info in check_node_list.items(): | |||
| type_check(node_name, (str,), "node_name") | |||
| type_check(node_info, (dict,), "node_info") | |||
| for info_name, info_param in node_info.items(): | |||
| type_check(info_name, (str,), "node parameter name") | |||
| if info_name in ["device_id"]: | |||
| for param in info_param: | |||
| check_uint32(param, "device_id") | |||
| elif info_name in ["root_graph_id"]: | |||
| for param in info_param: | |||
| check_uint32(param, "root_graph_id") | |||
| elif info_name in ["is_parameter"]: | |||
| type_check(info_param, (bool,), "is_parameter") | |||
| else: | |||
| raise ValueError("Node parameter {} is not defined.".format(info_name)) | |||
| param_names = ["param_{0}".format(i) for i in range(len(parameter_list))] | |||
| type_check_list(parameter_list, (cds.Parameter,), param_names) | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_remove_watchpoint(method): | |||
| """Wrapper method to check the parameters of DbgServices RemoveWatchpoint.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [id_value], _ = parse_user_args(method, *args, **kwargs) | |||
| check_uint32(id_value, "id") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_check_watchpoints(method): | |||
| """Wrapper method to check the parameters of DbgServices CheckWatchpoint.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [iteration], _ = parse_user_args(method, *args, **kwargs) | |||
| check_uint32(iteration, "iteration") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_read_tensors(method): | |||
| """Wrapper method to check the parameters of DbgServices ReadTensors.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [info_list], _ = parse_user_args(method, *args, **kwargs) | |||
| info_names = ["info_{0}".format(i) for i in range(len(info_list))] | |||
| type_check_list(info_list, (cds.TensorInfo,), info_names) | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_initialize_done(method): | |||
| """Wrapper method to check if initlize is done for DbgServices.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| if not self.initialized: | |||
| raise RuntimeError("Inilize should be called before any other methods of DbgServices!") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_tensor_info_init(method): | |||
| """Wrapper method to check the parameters of DbgServices TensorInfo init.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [node_name, slot, iteration, device_id, root_graph_id, | |||
| is_parameter], _ = parse_user_args(method, *args, **kwargs) | |||
| type_check(node_name, (str,), "node_name") | |||
| check_uint32(slot, "slot") | |||
| check_uint32(iteration, "iteration") | |||
| check_uint32(device_id, "device_id") | |||
| check_uint32(root_graph_id, "root_graph_id") | |||
| type_check(is_parameter, (bool,), "is_parameter") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_tensor_data_init(method): | |||
| """Wrapper method to check the parameters of DbgServices TensorData init.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [data_ptr, data_size, dtype, shape], _ = parse_user_args(method, *args, **kwargs) | |||
| type_check(data_ptr, (bytes,), "data_ptr") | |||
| check_uint64(data_size, "data_size") | |||
| type_check(dtype, (int,), "dtype") | |||
| shape_names = ["shape_{0}".format(i) for i in range(len(shape))] | |||
| type_check_list(shape, (int,), shape_names) | |||
| if len(data_ptr) != data_size: | |||
| raise ValueError("data_ptr length ({0}) is not equal to data_size ({1}).".format(len(data_ptr), data_size)) | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_watchpoint_hit_init(method): | |||
| """Wrapper method to check the parameters of DbgServices WatchpointHit init.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [name, slot, condition, watchpoint_id, | |||
| parameters, error_code, device_id, root_graph_id], _ = parse_user_args(method, *args, **kwargs) | |||
| type_check(name, (str,), "name") | |||
| check_uint32(slot, "slot") | |||
| type_check(condition, (int,), "condition") | |||
| check_uint32(watchpoint_id, "watchpoint_id") | |||
| param_names = ["param_{0}".format(i) for i in range(len(parameters))] | |||
| type_check_list(parameters, (cds.Parameter,), param_names) | |||
| type_check(error_code, (int,), "error_code") | |||
| check_uint32(device_id, "device_id") | |||
| check_uint32(root_graph_id, "root_graph_id") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_parameter_init(method): | |||
| """Wrapper method to check the parameters of DbgServices Parameter init.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [name, disabled, value, hit, actual_value], _ = parse_user_args(method, *args, **kwargs) | |||
| type_check(name, (str,), "name") | |||
| type_check(disabled, (bool,), "disabled") | |||
| type_check(value, (float,), "value") | |||
| type_check(hit, (bool,), "hit") | |||
| type_check(actual_value, (float,), "actual_value") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| @@ -0,0 +1,19 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "debugger/offline_debug/offline_logger.h" | |||
| bool DbgLogger::verbose = false; | |||
| @@ -0,0 +1,59 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef OFFLINE_LOGGER_H_ | |||
| #define OFFLINE_LOGGER_H_ | |||
| #include <iostream> | |||
| #define MS_LOG(level) MS_LOG_##level | |||
| #define MS_LOG_INFO static_cast<void>(0), !(DbgLogger::verbose) ? void(0) : DbgLogger(DbgLoggerLvl::INFO) < std::cout | |||
| #define MS_LOG_ERROR MS_LOG_INFO | |||
| #define MS_LOG_DEBUG MS_LOG_INFO | |||
| #define MS_LOG_WARNING MS_LOG_INFO | |||
| #define MS_LOG_EXCEPTION \ | |||
| static_cast<void>(0), !(DbgLogger::verbose) ? void(0) : DbgLogger(DbgLoggerLvl::EXCEPTION) < std::cout | |||
| enum DbgLoggerLvl : int { DEBUG = 0, INFO, WARNING, ERROR, EXCEPTION }; | |||
| class DbgLogger { | |||
| public: | |||
| explicit DbgLogger(DbgLoggerLvl lvl) : lvl_(lvl) {} | |||
| ~DbgLogger() = default; | |||
| void operator<(std::ostream &os) const { | |||
| char *dbg_log_path = getenv("OFFLINE_DBG_LOG"); | |||
| if (dbg_log_path != NULL) { | |||
| FILE *fp; | |||
| fp = freopen(dbg_log_path, "a", stdout); | |||
| if (fp == nullptr) { | |||
| std::cout << "ERROR: DbgLogger could not redirect all stdout to a file"; | |||
| } | |||
| } | |||
| os << std::endl; | |||
| if (lvl_ == DbgLoggerLvl::EXCEPTION) { | |||
| throw; | |||
| } | |||
| } | |||
| static bool verbose; | |||
| private: | |||
| DbgLoggerLvl lvl_; | |||
| }; | |||
| #endif // OFFLINE_LOGGER_H_ | |||
| @@ -22,7 +22,16 @@ | |||
| #include <tuple> | |||
| #include "debug/debugger/tensor_summary.h" | |||
| #ifdef OFFLINE_DBG_MODE | |||
| #include "Eigen/Core" | |||
| #include "Eigen/src/Core/arch/CUDA/Half.h" | |||
| using float16 = Eigen::half; | |||
| #include "offline_debug/offline_logger.h" | |||
| #endif | |||
| #ifdef ONLINE_DBG_MODE | |||
| namespace mindspore { | |||
| #endif | |||
| using CONDITION_TYPE = DebugServices::CONDITION_TYPE; | |||
| RangeCountCalculator::RangeCountCalculator() | |||
| @@ -281,4 +290,6 @@ template class TensorSummary<float16>; | |||
| template class TensorSummary<float>; | |||
| template class TensorSummary<double>; | |||
| template class TensorSummary<bool>; | |||
| #ifdef ONLINE_DBG_MODE | |||
| } // namespace mindspore | |||
| #endif | |||
| @@ -24,7 +24,9 @@ | |||
| #include "debug/debug_services.h" | |||
| #ifdef ONLINE_DBG_MODE | |||
| namespace mindspore { | |||
| #endif | |||
| class RangeCountCalculator { | |||
| public: | |||
| RangeCountCalculator(); | |||
| @@ -121,5 +123,7 @@ class TensorSummary : public ITensorSummary { | |||
| double_t GetZeroValPercent(); | |||
| void InitCalculators(const std::vector<DebugServices::watchpoint_t> &); | |||
| }; | |||
| #ifdef ONLINE_DBG_MODE | |||
| } // namespace mindspore | |||
| #endif | |||
| #endif // MINDSPORE_TENSOR_SUMMARY_H | |||
| @@ -16,37 +16,170 @@ | |||
| #ifndef MINDSPORE_CCSRC_DEBUG_TENSOR_DATA_H_ | |||
| #define MINDSPORE_CCSRC_DEBUG_TENSOR_DATA_H_ | |||
| #include <algorithm> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <cstring> | |||
| #include <iostream> | |||
| #ifdef OFFLINE_DBG_MODE | |||
| #include "debugger/offline_debug/offline_logger.h" | |||
| #else | |||
| #include "ir/tensor.h" | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #endif | |||
| #ifdef ONLINE_DBG_MODE | |||
| namespace mindspore { | |||
| class TensorData { | |||
| private: | |||
| mindspore::tensor::TensorPtr tensor_ptr; | |||
| std::string name; | |||
| size_t slot; | |||
| int execution_order; | |||
| #endif | |||
| namespace MsTypeId { | |||
| typedef enum MsTypeId : unsigned int { | |||
| kTypeUnknown = 0, | |||
| kMetaTypeBegin = kTypeUnknown, | |||
| kMetaTypeType, // Type | |||
| kMetaTypeAnything, | |||
| kMetaTypeObject, | |||
| kMetaTypeTypeType, // TypeType | |||
| kMetaTypeProblem, | |||
| kMetaTypeExternal, | |||
| kMetaTypeNone, | |||
| kMetaTypeNull, | |||
| kMetaTypeEllipsis, | |||
| kMetaTypeEnd, | |||
| // | |||
| // Object types | |||
| // | |||
| kObjectTypeBegin = kMetaTypeEnd, | |||
| kObjectTypeNumber, | |||
| kObjectTypeString, | |||
| kObjectTypeList, | |||
| kObjectTypeTuple, | |||
| kObjectTypeSlice, | |||
| kObjectTypeKeyword, | |||
| kObjectTypeTensorType, | |||
| kObjectTypeRowTensorType, | |||
| kObjectTypeSparseTensorType, | |||
| kObjectTypeUndeterminedType, | |||
| kObjectTypeClass, | |||
| kObjectTypeDictionary, | |||
| kObjectTypeFunction, | |||
| kObjectTypeJTagged, | |||
| kObjectTypeSymbolicKeyType, | |||
| kObjectTypeEnvType, | |||
| kObjectTypeRefKey, | |||
| kObjectTypeRef, | |||
| kObjectTypeEnd, | |||
| // | |||
| // Number Types | |||
| // | |||
| kNumberTypeBegin = kObjectTypeEnd, | |||
| kNumberTypeBool, | |||
| kNumberTypeInt, | |||
| kNumberTypeInt8, | |||
| kNumberTypeInt16, | |||
| kNumberTypeInt32, | |||
| kNumberTypeInt64, | |||
| kNumberTypeUInt, | |||
| kNumberTypeUInt8, | |||
| kNumberTypeUInt16, | |||
| kNumberTypeUInt32, | |||
| kNumberTypeUInt64, | |||
| kNumberTypeFloat, | |||
| kNumberTypeFloat16, | |||
| kNumberTypeFloat32, | |||
| kNumberTypeFloat64, | |||
| kNumberTypeComplex64, | |||
| kNumberTypeEnd | |||
| } MsTypeId; | |||
| } // namespace MsTypeId | |||
| typedef enum DbgDataType : unsigned int { | |||
| DT_UNDEFINED = 0, | |||
| // Basic types. | |||
| DT_BOOL = 1, // bool | |||
| DT_INT8 = 2, // int8_t | |||
| DT_INT16 = 3, // int16_t | |||
| DT_INT32 = 4, // int32_t | |||
| DT_INT64 = 5, // int64_t | |||
| DT_UINT8 = 6, // uint8_t | |||
| DT_UINT16 = 7, // uint16_t | |||
| DT_UINT32 = 8, // uint32_t | |||
| DT_UINT64 = 9, // uint64_t | |||
| DT_FLOAT16 = 10, // float 16 | |||
| DT_FLOAT32 = 11, // float 32 | |||
| DT_FLOAT64 = 12, // float 64 | |||
| DT_STRING = 13, // string | |||
| DT_TENSOR = 14, // tensor | |||
| DT_GRAPH = 15, // graph | |||
| // list type | |||
| DT_BOOLS = 16, // list of bool | |||
| DT_INTS8 = 17, // list of int8_t | |||
| DT_INTS16 = 18, // list of int16_t | |||
| DT_INTS32 = 19, // list of int32_t | |||
| DT_INTS64 = 20, // list of int64_t | |||
| DT_UINTS8 = 21, // list of uint8_t | |||
| DT_UINTS16 = 22, // list of uint16_t | |||
| DT_UINTS32 = 23, // list of uint32_t | |||
| DT_UINTS64 = 24, // list of uint64_t | |||
| DT_FLOATS16 = 25, // list of float16 | |||
| DT_FLOATS32 = 26, // list of float32 | |||
| DT_FLOATS64 = 27, // list of float64 | |||
| DT_STRINGS = 28, // list of string | |||
| DT_TENSORS = 29, // list of tensor | |||
| DT_GRAPHS = 30, // list of graph | |||
| DT_TUPLE = 31, // tuple | |||
| DT_LIST = 32, // list | |||
| DT_DICT = 33, // dictionary | |||
| // other types | |||
| DT_NONE = 34, // None | |||
| DT_SYM_INST = 35, // Symbolic Key Instance | |||
| // type related type | |||
| DT_BASE_INT = 36, // type generic int | |||
| DT_BASE_UINT = 37, // type generate unsigned int | |||
| DT_BASE_FLOAT = 38, // type generate float | |||
| DT_TYPE = 39, // type type | |||
| DT_ANYTHING = 40, // type anything | |||
| DT_REFKEY = 41, // type refkey | |||
| DT_REF = 42 // type ref | |||
| } DbgDataType; | |||
| class TensorData { | |||
| public: | |||
| TensorData() : slot(0), execution_order(-1) {} | |||
| TensorData(const TensorData &obj) { | |||
| std::cout << "Copy Constructor" << std::endl; | |||
| MS_LOG(INFO) << "Copy Constructor"; | |||
| this->name = obj.name; | |||
| this->execution_order = obj.execution_order; | |||
| this->slot = obj.slot; | |||
| this->data_ptr = obj.data_ptr; | |||
| this->size = obj.size; | |||
| this->data_type = obj.data_type; | |||
| this->data_type_size = obj.data_type_size; | |||
| this->shape = obj.shape; | |||
| this->iteration = obj.iteration; | |||
| this->device_id = obj.device_id; | |||
| #ifdef ONLINE_DBG_MODE | |||
| this->tensor_ptr = obj.tensor_ptr; | |||
| #endif | |||
| } | |||
| ~TensorData() {} | |||
| std::string GetName() { return this->name; } | |||
| mindspore::tensor::TensorPtr GetTensor() { return this->tensor_ptr; } | |||
| size_t GetSlot() { return this->slot; } | |||
| int GetExecutionOrder() { return this->execution_order; } | |||
| @@ -55,9 +188,179 @@ class TensorData { | |||
| void SetName(const std::string &name) { this->name = name; } | |||
| #ifdef ONLINE_DBG_MODE | |||
| void SetTensor(mindspore::tensor::TensorPtr out_tensor) { this->tensor_ptr = out_tensor; } | |||
| #endif | |||
| void SetSlot(size_t slot) { this->slot = slot; } | |||
| char *GetDataPtr() { return data_ptr; } | |||
| void SetDataPtr(char *data_ptr) { this->data_ptr = data_ptr; } | |||
| uint32_t GetNumElements() { return size / data_type_size; } | |||
| uint64_t GetByteSize() { return size; } | |||
| void SetByteSize(uint64_t size) { this->size = size; } | |||
| std::vector<int64_t> GetShape() { return shape; } | |||
| void SetShape(std::vector<int64_t> shape) { this->shape = shape; } | |||
| unsigned int GetIteration() { return iteration; } | |||
| void SetIteration(unsigned int iteration) { this->iteration = iteration; } | |||
| unsigned int GetDeviceId() { return device_id; } | |||
| void SetDeviceId(unsigned int device_id) { this->device_id = device_id; } | |||
| unsigned int GetRootGraphId() { return root_graph_id; } | |||
| void SetRootGraphId(unsigned int root_graph_id) { this->root_graph_id = root_graph_id; } | |||
| DbgDataType GetType() { return data_type; } | |||
| void SetType(unsigned int type) { ConvertMsToDbgType(type); } | |||
| void SetType(std::string type_name) { ConvertStringToDbgType(type_name); } | |||
| void ConvertMsToDbgType(uint32_t type) { | |||
| switch (type) { | |||
| case MsTypeId::kNumberTypeBool: | |||
| this->data_type = DbgDataType::DT_BOOL; | |||
| this->data_type_size = 1; | |||
| break; | |||
| case MsTypeId::kNumberTypeInt8: | |||
| this->data_type = DbgDataType::DT_INT8; | |||
| this->data_type_size = 1; | |||
| break; | |||
| case MsTypeId::kNumberTypeInt16: | |||
| this->data_type = DbgDataType::DT_INT16; | |||
| this->data_type_size = 2; | |||
| break; | |||
| case MsTypeId::kNumberTypeInt32: | |||
| this->data_type = DbgDataType::DT_INT32; | |||
| this->data_type_size = 4; | |||
| break; | |||
| case MsTypeId::kNumberTypeInt64: | |||
| this->data_type = DbgDataType::DT_INT64; | |||
| this->data_type_size = 8; | |||
| break; | |||
| case MsTypeId::kNumberTypeUInt8: | |||
| this->data_type = DbgDataType::DT_UINT8; | |||
| this->data_type_size = 1; | |||
| break; | |||
| case MsTypeId::kNumberTypeUInt16: | |||
| this->data_type = DbgDataType::DT_UINT16; | |||
| this->data_type_size = 2; | |||
| break; | |||
| case MsTypeId::kNumberTypeUInt32: | |||
| this->data_type = DbgDataType::DT_UINT32; | |||
| this->data_type_size = 4; | |||
| break; | |||
| case MsTypeId::kNumberTypeUInt64: | |||
| this->data_type = DbgDataType::DT_UINT64; | |||
| this->data_type_size = 8; | |||
| break; | |||
| case MsTypeId::kNumberTypeFloat16: | |||
| this->data_type = DbgDataType::DT_FLOAT16; | |||
| this->data_type_size = 2; | |||
| break; | |||
| case MsTypeId::kNumberTypeFloat32: | |||
| this->data_type = DbgDataType::DT_FLOAT32; | |||
| this->data_type_size = 4; | |||
| break; | |||
| case MsTypeId::kNumberTypeFloat64: | |||
| this->data_type = DbgDataType::DT_FLOAT64; | |||
| this->data_type_size = 8; | |||
| break; | |||
| case MsTypeId::kNumberTypeInt: | |||
| this->data_type = DbgDataType::DT_BASE_INT; | |||
| this->data_type_size = 4; | |||
| break; | |||
| case MsTypeId::kNumberTypeUInt: | |||
| this->data_type = DbgDataType::DT_BASE_UINT; | |||
| this->data_type_size = 4; | |||
| break; | |||
| case MsTypeId::kNumberTypeFloat: | |||
| this->data_type = DbgDataType::DT_BASE_FLOAT; | |||
| this->data_type_size = 4; | |||
| break; | |||
| default: | |||
| MS_LOG(EXCEPTION) << "Unexpected type id: " << type; | |||
| } | |||
| } | |||
| void ConvertStringToDbgType(const std::string &type_name) { | |||
| std::string type_name_lower = type_name; | |||
| std::string trans_true_prefix = "kNumberType"; | |||
| if (type_name.find(trans_true_prefix) == 0) { | |||
| type_name_lower = type_name.substr(trans_true_prefix.length()); | |||
| } | |||
| (void)std::transform(type_name_lower.begin(), type_name_lower.end(), type_name_lower.begin(), ::tolower); | |||
| if (type_name_lower == "bool") { | |||
| this->data_type = DbgDataType::DT_BOOL; | |||
| this->data_type_size = 1; | |||
| } else if (type_name_lower == "int8") { | |||
| this->data_type = DbgDataType::DT_INT8; | |||
| this->data_type_size = 1; | |||
| } else if (type_name_lower == "int16") { | |||
| this->data_type = DbgDataType::DT_INT16; | |||
| this->data_type_size = 2; | |||
| } else if (type_name_lower == "int32") { | |||
| this->data_type = DbgDataType::DT_INT32; | |||
| this->data_type_size = 4; | |||
| } else if (type_name_lower == "int64") { | |||
| this->data_type = DbgDataType::DT_INT64; | |||
| this->data_type_size = 8; | |||
| } else if (type_name_lower == "uint8") { | |||
| this->data_type = DbgDataType::DT_UINT8; | |||
| this->data_type_size = 1; | |||
| } else if (type_name_lower == "uint16") { | |||
| this->data_type = DbgDataType::DT_UINT16; | |||
| this->data_type_size = 2; | |||
| } else if (type_name_lower == "uint32") { | |||
| this->data_type = DbgDataType::DT_UINT32; | |||
| this->data_type_size = 4; | |||
| } else if (type_name_lower == "uint64") { | |||
| this->data_type = DbgDataType::DT_UINT64; | |||
| this->data_type_size = 8; | |||
| } else if (type_name_lower == "float16") { | |||
| this->data_type = DbgDataType::DT_FLOAT16; | |||
| this->data_type_size = 2; | |||
| } else if (type_name_lower == "float32") { | |||
| this->data_type = DbgDataType::DT_FLOAT32; | |||
| this->data_type_size = 4; | |||
| } else if (type_name_lower == "float64") { | |||
| this->data_type = DbgDataType::DT_FLOAT64; | |||
| this->data_type_size = 8; | |||
| } else if (type_name_lower == "") { | |||
| this->data_type = DbgDataType::DT_UNDEFINED; | |||
| this->data_type_size = 0; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Unexpected type name: " << type_name; | |||
| } | |||
| } | |||
| private: | |||
| char *data_ptr; // pointer to the pre-allocated memory | |||
| uint64_t size; // size in bytes | |||
| DbgDataType data_type; // internal debugger type | |||
| unsigned int data_type_size; | |||
| std::vector<int64_t> shape; | |||
| std::string name; | |||
| uint64_t slot; | |||
| unsigned int iteration; | |||
| unsigned int device_id; | |||
| unsigned int root_graph_id; | |||
| int execution_order; | |||
| #ifdef ONLINE_DBG_MODE | |||
| mindspore::tensor::TensorPtr tensor_ptr; | |||
| #endif | |||
| }; | |||
| #ifdef ONLINE_DBG_MODE | |||
| } // namespace mindspore | |||
| #endif | |||
| #endif // MINDSPORE_CCSRC_DEBUG_TENSOR_DATA_H_ | |||
| @@ -23,10 +23,14 @@ | |||
| #include <tuple> | |||
| #include <string> | |||
| #include <utility> | |||
| #ifdef OFFLINE_DBG_MODE | |||
| #include "debugger/offline_debug/offline_logger.h" | |||
| #endif | |||
| #include "debug/tensor_data.h" | |||
| #ifdef ONLINE_DBG_MODE | |||
| #include "debug/data_dump/dump_json_parser.h" | |||
| #include "ir/dtype.h" | |||
| namespace mindspore { | |||
| #endif | |||
| class TensorLoader { | |||
| public: | |||
| TensorLoader() : iter_num(-1) {} | |||
| @@ -152,9 +156,10 @@ class TensorLoader { | |||
| void set_iter_num(uint32_t iter_num) { this->iter_num = iter_num; } | |||
| #ifdef ONLINE_DBG_MODE | |||
| bool DumpTensorToFile(const std::string &tensor_name, bool trans_flag, const std::string &filepath, | |||
| const std::string &host_fmt, const std::vector<int64_t> &host_shape, TypeId host_type, | |||
| TypeId addr_type_id, const std::string &addr_format, size_t slot) const { | |||
| TypeId addr_type_id, const std::string &addr_format, size_t slot) { | |||
| if (filepath.empty()) { | |||
| MS_LOG(ERROR) << "Dump file path is null!"; | |||
| return false; | |||
| @@ -181,21 +186,24 @@ class TensorLoader { | |||
| auto iter = tensor_list_map.find(tensor_loader_name); | |||
| if (iter != tensor_list_map.end()) { | |||
| std::shared_ptr<TensorData> node = iter->second; | |||
| mindspore::tensor::TensorPtr out_tensor = node->GetTensor(); | |||
| size_t host_size = out_tensor->data().nbytes(); | |||
| size_t host_size = node->GetByteSize(); | |||
| return DumpJsonParser::DumpToFile(path, out_tensor->data_c(), host_size); | |||
| return DumpJsonParser::DumpToFile(path, node->GetDataPtr(), host_size); | |||
| } | |||
| MS_LOG(INFO) << "Tensor name:" << tensor_name << " not found in tensor_list_map"; | |||
| return true; | |||
| } | |||
| #endif | |||
| private: | |||
| // the pair is (device_id, iteration) | |||
| std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map; | |||
| std::multimap<std::string, std::shared_ptr<TensorData>> node_tensor_map; | |||
| std::map<std::string, std::shared_ptr<TensorData>> prev_tensor_list_map; | |||
| uint32_t iter_num; | |||
| std::mutex lock_; | |||
| }; | |||
| #ifdef ONLINE_DBG_MODE | |||
| } // namespace mindspore | |||
| #endif | |||
| #endif // MINDSPORE_CCSRC_DEBUG_TENSOR_LOAD_H_ | |||
| @@ -713,6 +713,10 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec | |||
| } | |||
| MS_LOG(INFO) << "E2E tensor name is " << tensor_name; | |||
| tensor_data->SetTensor(out_tensor); | |||
| tensor_data->SetDataPtr(static_cast<char *>(out_tensor->data_c())); | |||
| tensor_data->SetByteSize(out_tensor->data().nbytes()); | |||
| tensor_data->SetType((unsigned int)host_type); | |||
| tensor_data->SetShape(out_tensor->shape()); | |||
| ret = Debugger::GetInstance()->LoadNewTensor(tensor_data, keep_prev); | |||
| return ret; | |||
| } | |||
| @@ -93,7 +93,7 @@ void GPUDeviceAddress::ClearDeviceMemory() { | |||
| } | |||
| GPUDeviceAddress::~GPUDeviceAddress() { ClearDeviceMemory(); } | |||
| #ifdef ENABLE_DEBUGGER | |||
| bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt, | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, | |||
| bool keep_prev) const { | |||
| @@ -117,13 +117,16 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi | |||
| auto tensor_data = std::make_shared<mindspore::TensorData>(); | |||
| tensor_data->SetName(tensor_name); | |||
| tensor_data->SetExecutionOrder(execution_order); | |||
| tensor_data->SetTensor(out_tensor); | |||
| tensor_data->SetSlot(slot); | |||
| tensor_data->SetTensor(out_tensor); | |||
| tensor_data->SetDataPtr(static_cast<char *>(out_tensor->data_c())); | |||
| tensor_data->SetByteSize(out_tensor->data().nbytes()); | |||
| tensor_data->SetType((unsigned int)host_type); | |||
| tensor_data->SetShape(out_tensor->shape()); | |||
| ret = Debugger::GetInstance()->LoadNewTensor(tensor_data, keep_prev); | |||
| MS_LOG(INFO) << "E2E tensor name is " << tensor_name; | |||
| return ret; | |||
| } | |||
| #endif | |||
| } // namespace gpu | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| @@ -114,32 +114,33 @@ static int GetSlogLevel(MsLogLevel level) { | |||
| static const char *GetSubModuleName(SubModuleId module_id) { | |||
| static const char *sub_module_names[NUM_SUBMODUES] = { | |||
| "UNKNOWN", // SM_UNKNOWN | |||
| "CORE", // SM_CORE | |||
| "ANALYZER", // SM_ANALYZER | |||
| "COMMON", // SM_COMMON | |||
| "DEBUG", // SM_DEBUG | |||
| "DEVICE", // SM_DEVICE | |||
| "GE_ADPT", // SM_GE_ADPT | |||
| "IR", // SM_IR | |||
| "KERNEL", // SM_KERNEL | |||
| "MD", // SM_MD | |||
| "ME", // SM_ME | |||
| "EXPRESS", // SM_EXPRESS | |||
| "OPTIMIZER", // SM_OPTIMIZER | |||
| "PARALLEL", // SM_PARALLEL | |||
| "PARSER", // SM_PARSER | |||
| "PIPELINE", // SM_PIPELINE | |||
| "PRE_ACT", // SM_PRE_ACT | |||
| "PYNATIVE", // SM_PYNATIVE | |||
| "SESSION", // SM_SESSION | |||
| "UTILS", // SM_UTILS | |||
| "VM", // SM_VM | |||
| "PROFILER", // SM_PROFILER | |||
| "PS", // SM_PS | |||
| "LITE", // SM_LITE | |||
| "HCCL_ADPT", // SM_HCCL_ADPT | |||
| "MINDQUANTUM" // SM_MINDQUANTUM | |||
| "UNKNOWN", // SM_UNKNOWN | |||
| "CORE", // SM_CORE | |||
| "ANALYZER", // SM_ANALYZER | |||
| "COMMON", // SM_COMMON | |||
| "DEBUG", // SM_DEBUG | |||
| "OFFLINE_DEBUG", // SM_OFFLINE_DEBUG | |||
| "DEVICE", // SM_DEVICE | |||
| "GE_ADPT", // SM_GE_ADPT | |||
| "IR", // SM_IR | |||
| "KERNEL", // SM_KERNEL | |||
| "MD", // SM_MD | |||
| "ME", // SM_ME | |||
| "EXPRESS", // SM_EXPRESS | |||
| "OPTIMIZER", // SM_OPTIMIZER | |||
| "PARALLEL", // SM_PARALLEL | |||
| "PARSER", // SM_PARSER | |||
| "PIPELINE", // SM_PIPELINE | |||
| "PRE_ACT", // SM_PRE_ACT | |||
| "PYNATIVE", // SM_PYNATIVE | |||
| "SESSION", // SM_SESSION | |||
| "UTILS", // SM_UTILS | |||
| "VM", // SM_VM | |||
| "PROFILER", // SM_PROFILER | |||
| "PS", // SM_PS | |||
| "LITE", // SM_LITE | |||
| "HCCL_ADPT", // SM_HCCL_ADPT | |||
| "MINDQUANTUM" // SM_MINDQUANTUM | |||
| }; | |||
| return sub_module_names[module_id % NUM_SUBMODUES]; | |||
| @@ -111,6 +111,7 @@ enum SubModuleId : int { | |||
| SM_ANALYZER, // static analyzer | |||
| SM_COMMON, // common | |||
| SM_DEBUG, // debug | |||
| SM_OFFLINE_DEBUG, // offline debug | |||
| SM_DEVICE, // device | |||
| SM_GE_ADPT, // ge adapter | |||
| SM_IR, // IR | |||
| @@ -0,0 +1,21 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| """ | |||
| This module provides APIs to load and process dump data, i.e. read tensors, check | |||
| for watchpoints and other debugging services. | |||
| """ | |||
| from . import dbg_services | |||
| from . import mi_validator_helpers | |||
| from . import mi_validators | |||
| @@ -0,0 +1,870 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| The module DbgServices provides offline debugger APIs. | |||
| """ | |||
| import mindspore._mindspore_offline_debug as cds | |||
| from mindspore.offline_debug.mi_validators import check_init, check_initialize, check_add_watchpoint, check_remove_watchpoint, check_check_watchpoints, check_read_tensors, check_initialize_done, check_tensor_info_init, check_tensor_data_init, check_watchpoint_hit_init, check_parameter_init | |||
| def get_version(): | |||
| """ | |||
| Function to return offline Debug Services version. | |||
| Returns: | |||
| version (str): dbgServices version. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> version = dbg_services.get_version() | |||
| """ | |||
| return cds.DbgServices(False).GetVersion() | |||
| class DbgLogger: | |||
| """ | |||
| Offline Debug Services Logger | |||
| Args: | |||
| verbose (bool): whether to print logs. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> version = dbg_services.DbgLogger(verbose=False) | |||
| """ | |||
| def __init__(self, verbose): | |||
| self.verbose = verbose | |||
| def __call__(self, *logs): | |||
| if self.verbose: | |||
| print(logs) | |||
| log = DbgLogger(False) | |||
| class DbgServices(): | |||
| """ | |||
| Offline Debug Services class. | |||
| Args: | |||
| dump_file_path (str): directory where the dump files are saved. | |||
| verbose (bool): whether to print logs (default: False).. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> d = dbg_services.DbgServices(dump_file_path="dump_file_path", | |||
| >>> verbose=True) | |||
| """ | |||
| @check_init | |||
| def __init__(self, dump_file_path, verbose=False): | |||
| log.verbose = verbose | |||
| log("in Python __init__, file path is ", dump_file_path) | |||
| self.dump_file_path = dump_file_path | |||
| self.dbg_instance = cds.DbgServices(verbose) | |||
| self.version = self.dbg_instance.GetVersion() | |||
| self.verbose = verbose | |||
| self.initialized = False | |||
| @check_initialize | |||
| def initialize(self, net_name, is_sync_mode=True): | |||
| """ | |||
| Initialize Debug Service. | |||
| Args: | |||
| net_name (str): Network name. | |||
| is_sync_mode (bool): Whether to process synchronous or asynchronous dump files mode | |||
| (default: True (synchronous)). | |||
| Returns: | |||
| Initialized Debug Service instance. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> d = dbg_services.DbgServices(dump_file_path="dump_file_path", | |||
| >>> verbose=True) | |||
| >>> d_init = d.initialize(net_name="network name", is_sync_mode=True) | |||
| """ | |||
| log("in Python Initialize dump_file_path ", self.dump_file_path) | |||
| self.initialized = True | |||
| return self.dbg_instance.Initialize(net_name, self.dump_file_path, is_sync_mode) | |||
| @check_initialize_done | |||
| @check_add_watchpoint | |||
| def add_watchpoint(self, watchpoint_id, watch_condition, check_node_list, parameter_list): | |||
| """ | |||
| Adding watchpoint to Debug Service instance. | |||
| Args: | |||
| watchpoint_id (int): Watchpoint id | |||
| watch_condition (int): A representation of the condition to be checked. | |||
| check_node_list (dict): Dictionary of node names (str or '*' to check all nodes) as key, | |||
| mapping to device_id (list of ints or '*' to check all devices), | |||
| root_graph_id (list of ints or '*' to check all graphs) and is_parameter (bool). | |||
| parameter_list (list): List of parameters in watchpoint. Parameters should be instances of Parameter class. | |||
| Each parameter describes the value to be checked in watchpoint. | |||
| Returns: | |||
| Debug Service instance with added watchpoint. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> d = dbg_services.DbgServices(dump_file_path="dump_file_path", | |||
| >>> verbose=True) | |||
| >>> d_init = d.initialize(is_sync_mode=True) | |||
| >>> d_wp = d_init.add_watchpoint(watchpoint_id=1, | |||
| >>> watch_condition=6, | |||
| >>> check_node_list={"conv2.bias" : {"device_id": [0], | |||
| root_graph_id: [0], "is_parameter": True}}, | |||
| >>> parameter_list=[dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> actual_value=0.0)]) | |||
| """ | |||
| log("in Python AddWatchpoint") | |||
| for node_name, node_info in check_node_list.items(): | |||
| for info_name, info_param in node_info.items(): | |||
| if info_name in ["device_id", "root_graph_id"]: | |||
| if info_param in ["*"]: | |||
| check_node_list[node_name][info_name] = ["*"] | |||
| else: | |||
| check_node_list[node_name][info_name] = list(map(str, info_param)) | |||
| parameter_list_inst = [] | |||
| for elem in parameter_list: | |||
| parameter_list_inst.append(elem.instance) | |||
| return self.dbg_instance.AddWatchpoint(watchpoint_id, watch_condition, check_node_list, parameter_list_inst) | |||
| @check_initialize_done | |||
| @check_remove_watchpoint | |||
| def remove_watchpoint(self, watchpoint_id): | |||
| """ | |||
| Removing watchpoint from Debug Service instance. | |||
| Args: | |||
| watchpoint_id (int): Watchpoint id | |||
| Returns: | |||
| Debug Service instance with removed watchpoint. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> d = dbg_services.DbgServices(dump_file_path="dump_file_path", | |||
| >>> verbose=True) | |||
| >>> d_init = d.initialize(is_sync_mode=True) | |||
| >>> d_wp = d_init.add_watchpoint(watchpoint_id=1, | |||
| >>> watch_condition=6, | |||
| >>> check_node_list={"conv2.bias" : {"device_id": [5], | |||
| root_graph_id: [0], "is_parameter": True}}, | |||
| >>> parameter_list=[dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> actual_value=0.0)]) | |||
| >>> d_wp = d_wp.remove_watchpoint(watchpoint_id=1) | |||
| """ | |||
| log("in Python Remove Watchpoint id ", watchpoint_id) | |||
| return self.dbg_instance.RemoveWatchpoint(watchpoint_id) | |||
| @check_initialize_done | |||
| @check_check_watchpoints | |||
| def check_watchpoints(self, iteration): | |||
| """ | |||
| Checking watchpoint at given iteration. | |||
| Args: | |||
| iteration (int): Watchpoint check iteration. | |||
| Returns: | |||
| Watchpoint hit list. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> d = dbg_services.DbgServices(dump_file_path="dump_file_path", | |||
| >>> verbose=True) | |||
| >>> d_init = d.initialize(is_sync_mode=True) | |||
| >>> d_wp = d_init.add_watchpoint(id=1, | |||
| >>> watch_condition=6, | |||
| >>> check_node_list={"conv2.bias" : {"device_id": [5], | |||
| root_graph_id: [0], "is_parameter": True}}, | |||
| >>> parameter_list=[dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> actual_value=0.0)]) | |||
| >>> watchpoints = d_wp.check_watchpoints(iteration=8) | |||
| """ | |||
| log("in Python CheckWatchpoints iteration ", iteration) | |||
| watchpoint_list = self.dbg_instance.CheckWatchpoints(iteration) | |||
| watchpoint_hit_list = [] | |||
| for watchpoint in watchpoint_list: | |||
| name = watchpoint.get_name() | |||
| slot = watchpoint.get_slot() | |||
| condition = watchpoint.get_condition() | |||
| watchpoint_id = watchpoint.get_watchpoint_id() | |||
| parameters = watchpoint.get_parameters() | |||
| error_code = watchpoint.get_error_code() | |||
| device_id = watchpoint.get_device_id() | |||
| root_graph_id = watchpoint.get_root_graph_id() | |||
| param_list = [] | |||
| for param in parameters: | |||
| p_name = param.get_name() | |||
| disabled = param.get_disabled() | |||
| value = param.get_value() | |||
| hit = param.get_hit() | |||
| actual_value = param.get_actual_value() | |||
| param_list.append(Parameter(p_name, disabled, value, hit, actual_value)) | |||
| watchpoint_hit_list.append(WatchpointHit(name, slot, condition, watchpoint_id, | |||
| param_list, error_code, device_id, root_graph_id)) | |||
| return watchpoint_hit_list | |||
| @check_initialize_done | |||
| @check_read_tensors | |||
| def read_tensors(self, info): | |||
| """ | |||
| Returning tensor data object describing the tensor requested tensor. | |||
| Args: | |||
| info (list): List of TensorInfo objects. | |||
| Returns: | |||
| TensorData list (list). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> d = dbg_services.DbgServices(dump_file_path="dump_file_path", | |||
| >>> verbose=True) | |||
| >>> d_init = d.initialize(is_sync_mode=True) | |||
| >>> tensor_data_list = d_init.read_tensors([dbg_services.TensorInfo(node_name="conv2.bias", | |||
| >>> slot=0, | |||
| >>> iteration=8, | |||
| >>> device_id=5, | |||
| >>> root_graph_id=0, | |||
| >>> is_parameter=True)]) | |||
| """ | |||
| log("in Python ReadTensors info ", info) | |||
| info_list_inst = [] | |||
| for elem in info: | |||
| log("in Python ReadTensors info ", info) | |||
| info_list_inst.append(elem.instance) | |||
| tensor_data_list = self.dbg_instance.ReadTensors(info_list_inst) | |||
| tensor_data_list_ret = [] | |||
| for elem in tensor_data_list: | |||
| if elem.get_data_size() == 0: | |||
| tensor_data = TensorData(b'', elem.get_data_size(), elem.get_dtype(), elem.get_shape()) | |||
| else: | |||
| tensor_data = TensorData(elem.get_data_ptr(), elem.get_data_size(), elem.get_dtype(), elem.get_shape()) | |||
| tensor_data_list_ret.append(tensor_data) | |||
| return tensor_data_list_ret | |||
| class TensorInfo(): | |||
| """ | |||
| Tensor Information class. | |||
| Args: | |||
| node_name (str): Fully qualified name of the desired node. | |||
| slot (int): The particular output for the requested node. | |||
| iteration (int): The desired itraretion to gather tensor information. | |||
| device_id (int): The desired device id to gather tensor information. | |||
| is_parameter (bool): Whether node is a parameter (input, constant, bias, parameter). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias", | |||
| >>> slot=0, | |||
| >>> iteration=8, | |||
| >>> device_id=5, | |||
| >>> root_graph_id=0, | |||
| >>> is_parameter=True) | |||
| """ | |||
| @check_tensor_info_init | |||
| def __init__(self, node_name, slot, iteration, device_id, root_graph_id, is_parameter): | |||
| self.instance = cds.tensor_info(node_name, slot, iteration, device_id, root_graph_id, is_parameter) | |||
| @property | |||
| def node_name(self): | |||
| """ | |||
| Function to receive TensorInfo node_name. | |||
| Returns: | |||
| node_name of TensorInfo instance (str). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias", | |||
| >>> slot=0, | |||
| >>> iteration=8, | |||
| >>> device_id=5, | |||
| >>> root_graph_id=0, | |||
| >>> is_parameter=True) | |||
| >>> name = tensor_info.node_name | |||
| """ | |||
| return self.instance.get_node_name() | |||
| @property | |||
| def slot(self): | |||
| """ | |||
| Function to receive TensorInfo slot. | |||
| Returns: | |||
| slot of TensorInfo instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias", | |||
| >>> slot=0, | |||
| >>> iteration=8, | |||
| >>> device_id=5, | |||
| >>> root_graph_id=0, | |||
| >>> is_parameter=True) | |||
| >>> slot = tensor_info.slot | |||
| """ | |||
| return self.instance.get_slot() | |||
| @property | |||
| def iteration(self): | |||
| """ | |||
| Function to receive TensorInfo iteration. | |||
| Returns: | |||
| iteration of TensorInfo instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias", | |||
| >>> slot=0, | |||
| >>> iteration=8, | |||
| >>> device_id=5, | |||
| >>> root_graph_id=0, | |||
| >>> is_parameter=True) | |||
| >>> iteration = tensor_info.iteration | |||
| """ | |||
| return self.instance.get_iteration() | |||
| @property | |||
| def device_id(self): | |||
| """ | |||
| Function to receive TensorInfo device_id. | |||
| Returns: | |||
| device_id of TensorInfo instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias", | |||
| >>> slot=0, | |||
| >>> iteration=8, | |||
| >>> device_id=5, | |||
| >>> root_graph_id=0, | |||
| >>> is_parameter=True) | |||
| >>> device_id = tensor_info.device_id | |||
| """ | |||
| @property | |||
| def root_graph_id(self): | |||
| """ | |||
| Function to receive TensorInfo root_graph_id. | |||
| Returns: | |||
| root_graph_id of TensorInfo instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias", | |||
| >>> slot=0, | |||
| >>> iteration=8, | |||
| >>> device_id=5, | |||
| >>> root_graph_id=0, | |||
| >>> is_parameter=True) | |||
| >>> device_id = tensor_info.root_graph_id | |||
| """ | |||
| return self.instance.get_root_graph_id() | |||
| @property | |||
| def is_parameter(self): | |||
| """ | |||
| Function to receive TensorInfo is_parameter. | |||
| Returns: | |||
| is_parameter of TensorInfo instance (bool). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_info = dbg_services.TensorInfo(node_name="conv2.bias", | |||
| >>> slot=0, | |||
| >>> iteration=8, | |||
| >>> device_id=5, | |||
| >>> root_graph_id=0, | |||
| >>> is_parameter=True) | |||
| >>> is_parameter = tensor_info.is_parameter | |||
| """ | |||
| return self.instance.get_is_parameter() | |||
| class TensorData(): | |||
| """ | |||
| TensorData class. | |||
| Args: | |||
| data_ptr (byte): Data pointer. | |||
| data_size (int): Size of data in bytes. | |||
| dtype (int): An encoding representing the type of TensorData. | |||
| shape (list): Shape of tensor. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0', | |||
| >>> data_size=4, | |||
| >>> dtype=0, | |||
| >>> shape=[2, 2]) | |||
| """ | |||
| @check_tensor_data_init | |||
| def __init__(self, data_ptr, data_size, dtype, shape): | |||
| self.instance = cds.tensor_data(data_ptr, data_size, dtype, shape) | |||
| @property | |||
| def data_ptr(self): | |||
| """ | |||
| Function to receive TensorData data_ptr. | |||
| Returns: | |||
| data_ptr of TensorData instance (byte). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0', | |||
| >>> data_size=4, | |||
| >>> dtype=0, | |||
| >>> shape=[2, 2]) | |||
| >>> data_ptr = tensor_data.data_ptr | |||
| """ | |||
| return self.instance.get_data_ptr() | |||
| @property | |||
| def data_size(self): | |||
| """ | |||
| Function to receive TensorData data_size. | |||
| Returns: | |||
| data_size of TensorData instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0', | |||
| >>> data_size=4, | |||
| >>> dtype=0, | |||
| >>> shape=[2, 2]) | |||
| >>> data_size = tensor_data.data_size | |||
| """ | |||
| return self.instance.get_data_size() | |||
| @property | |||
| def dtype(self): | |||
| """ | |||
| Function to receive TensorData dtype. | |||
| Returns: | |||
| dtype of TensorData instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0', | |||
| >>> data_size=4, | |||
| >>> dtype=0, | |||
| >>> shape=[2, 2]) | |||
| >>> dtype = tensor_data.dtype | |||
| """ | |||
| return self.instance.get_dtype() | |||
| @property | |||
| def shape(self): | |||
| """ | |||
| Function to receive TensorData shape. | |||
| Returns: | |||
| shape of TensorData instance (list). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> tensor_data = dbg_services.TensorData(data_ptr=b'\xba\xd0\xba\xd0', | |||
| >>> data_size=4, | |||
| >>> dtype=0, | |||
| >>> shape=[2, 2]) | |||
| >>> shape = tensor_data.shape | |||
| """ | |||
| return self.instance.get_shape() | |||
| class WatchpointHit(): | |||
| """ | |||
| WatchpointHit class. | |||
| Args: | |||
| name (str): Name of WatchpointHit instance. | |||
| slot (int): The numerical label of an output. | |||
| condition (int): A representation of the condition to be checked. | |||
| watchpoint_id (int): Watchpoint id. | |||
| parameters (list): A list of all parameters for WatchpointHit instance. | |||
| Parameters have to be instances of Parameter class. | |||
| error_code (int): An explanation of certain scenarios where watchpoint could not be checked. | |||
| device_id (int): Device id where the watchpoint is hit. | |||
| root_graph_id (int): Root graph id where the watchpoint is hit. | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| """ | |||
| @check_watchpoint_hit_init | |||
| def __init__(self, name, slot, condition, watchpoint_id, parameters, error_code, device_id, root_graph_id): | |||
| parameter_list_inst = [] | |||
| for elem in parameters: | |||
| parameter_list_inst.append(elem.instance) | |||
| self.instance = cds.watchpoint_hit(name, slot, condition, watchpoint_id, | |||
| parameter_list_inst, error_code, device_id, root_graph_id) | |||
| @property | |||
| def name(self): | |||
| """ | |||
| Function to receive WatchpointHit name. | |||
| Returns: | |||
| name of WatchpointHit instance (str). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| >>> name = watchpoint_hit.name | |||
| """ | |||
| return self.instance.get_name() | |||
| @property | |||
| def slot(self): | |||
| """ | |||
| Function to receive WatchpointHit slot. | |||
| Returns: | |||
| slot of WatchpointHit instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| >>> slot = watchpoint_hit.slot | |||
| """ | |||
| return self.instance.get_slot() | |||
| @property | |||
| def condition(self): | |||
| """ | |||
| Function to receive WatchpointHit condition. | |||
| Returns: | |||
| condition of WatchpointHit instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| >>> condition = watchpoint_hit.condition | |||
| """ | |||
| return self.instance.get_condition() | |||
| @property | |||
| def watchpoint_id(self): | |||
| """ | |||
| Function to receive WatchpointHit watchpoint_id. | |||
| Returns: | |||
| watchpoint_id of WatchpointHit instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| >>> watchpoint_id = watchpoint_hit.watchpoint_id | |||
| """ | |||
| return self.instance.get_watchpoint_id() | |||
| @property | |||
| def parameters(self): | |||
| """ | |||
| Function to receive WatchpointHit parameters. | |||
| Returns: | |||
| List of parameters of WatchpointHit instance (list). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| >>> parameters = watchpoint_hit.parameters | |||
| """ | |||
| params = self.instance.get_parameters() | |||
| param_list = [] | |||
| for elem in params: | |||
| tmp = Parameter(elem.get_name(), | |||
| elem.get_disabled(), | |||
| elem.get_value(), | |||
| elem.get_hit(), | |||
| elem.get_actual_value()) | |||
| param_list.append(tmp) | |||
| return param_list | |||
| @property | |||
| def error_code(self): | |||
| """ | |||
| Function to receive WatchpointHit error_code. | |||
| Returns: | |||
| error_code of WatchpointHit instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| >>> error_code = watchpoint_hit.error_code | |||
| """ | |||
| return self.instance.get_error_code() | |||
| @property | |||
| def device_id(self): | |||
| """ | |||
| Function to receive WatchpointHit device_id. | |||
| Returns: | |||
| device_id of WatchpointHit instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| >>> device_id = watchpoint_hit.device_id | |||
| """ | |||
| return self.instance.get_device_id() | |||
| @property | |||
| def root_graph_id(self): | |||
| """ | |||
| Function to receive WatchpointHit root_graph_id. | |||
| Returns: | |||
| root_graph_id of WatchpointHit instance (int). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> watchpoint_hit = dbg_services.WatchpointHit(name="hit1", | |||
| >>> slot=1, | |||
| >>> condition=2, | |||
| >>> watchpoint_id=3, | |||
| >>> parameters=[param1, param2], | |||
| >>> error_code=0, | |||
| >>> device_id=1, | |||
| >>> root_graph_id=1) | |||
| >>> root_graph_id = watchpoint_hit.root_graph_id | |||
| """ | |||
| return self.instance.get_root_graph_id() | |||
| class Parameter(): | |||
| """ | |||
| Parameter class. | |||
| Args: | |||
| name (str): Name of the parameter. | |||
| disabled (bool): Whether parameter is used in backend. | |||
| value (float): Threshold value of the parameter. | |||
| hit (bool): Whether this parameter triggered watchpoint (default is False). | |||
| actual_value (float): Actual value of the parameter (default is 0.0). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> parameter = dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> actual_value=0.0) | |||
| """ | |||
| @check_parameter_init | |||
| def __init__(self, name, disabled, value, hit=False, actual_value=0.0): | |||
| self.instance = cds.parameter(name, disabled, value, hit, actual_value) | |||
| @property | |||
| def name(self): | |||
| """ | |||
| Function to receive Parameter name. | |||
| Returns: | |||
| name of Parameter instance (str). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> parameter = dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> name = watchpoint_hit.name | |||
| """ | |||
| return self.instance.get_name() | |||
| @property | |||
| def disabled(self): | |||
| """ | |||
| Function to receive Parameter disabled value. | |||
| Returns: | |||
| disabled of Parameter instance (bool). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> parameter = dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> disabled = watchpoint_hit.disabled | |||
| """ | |||
| return self.instance.get_disabled() | |||
| @property | |||
| def value(self): | |||
| """ | |||
| Function to receive Parameter value. | |||
| Returns: | |||
| value of Parameter instance (float). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> parameter = dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> value = watchpoint_hit.value | |||
| """ | |||
| return self.instance.get_value() | |||
| @property | |||
| def hit(self): | |||
| """ | |||
| Function to receive Parameter hit value. | |||
| Returns: | |||
| hit of Parameter instance (bool). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> parameter = dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> hit = watchpoint_hit.hit | |||
| """ | |||
| return self.instance.get_hit() | |||
| @property | |||
| def actual_value(self): | |||
| """ | |||
| Function to receive Parameter actual_value value. | |||
| Returns: | |||
| actual_value of Parameter instance (float). | |||
| Examples: | |||
| >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services | |||
| >>> parameter = dbg_services.Parameter(name="param", | |||
| >>> disabled=False, | |||
| >>> value=0.0, | |||
| >>> hit=False, | |||
| >>> actual_value = watchpoint_hit.actual_value | |||
| """ | |||
| return self.instance.get_actual_value() | |||
| @@ -0,0 +1,123 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| General Validator Helper Functions. | |||
| """ | |||
| import os | |||
| import inspect | |||
| UINT32_MAX = 4294967295 | |||
| UINT32_MIN = 0 | |||
| UINT64_MAX = 18446744073709551615 | |||
| UINT64_MIN = 0 | |||
| def pad_arg_name(arg_name): | |||
| if arg_name != "": | |||
| arg_name = arg_name + " " | |||
| return arg_name | |||
| def check_value(arg, valid_range, arg_name=""): | |||
| arg_name = pad_arg_name(arg_name) | |||
| if arg < valid_range[0] or arg > valid_range[1]: | |||
| raise ValueError( | |||
| "Input {0}is not within the required interval of ({1} to {2}).".format(arg_name, | |||
| valid_range[0], valid_range[1])) | |||
| def check_uint32(arg, arg_name=""): | |||
| type_check(arg, (int,), arg_name) | |||
| check_value(arg, [UINT32_MIN, UINT32_MAX]) | |||
| def check_uint64(arg, arg_name=""): | |||
| type_check(arg, (int,), arg_name) | |||
| check_value(arg, [UINT64_MIN, UINT64_MAX]) | |||
| def check_dir(dataset_dir): | |||
| if not os.path.isdir(dataset_dir) or not os.access(dataset_dir, os.R_OK): | |||
| raise ValueError("The folder {} does not exist or permission denied!".format(dataset_dir)) | |||
| def parse_user_args(method, *args, **kwargs): | |||
| """ | |||
| Parse user arguments in a function. | |||
| Args: | |||
| method (method): a callable function. | |||
| args: user passed args. | |||
| kwargs: user passed kwargs. | |||
| Returns: | |||
| user_filled_args (list): values of what the user passed in for the arguments. | |||
| ba.arguments (Ordered Dict): ordered dict of parameter and argument for what the user has passed. | |||
| """ | |||
| sig = inspect.signature(method) | |||
| if 'self' in sig.parameters or 'cls' in sig.parameters: | |||
| ba = sig.bind(method, *args, **kwargs) | |||
| ba.apply_defaults() | |||
| params = list(sig.parameters.keys())[1:] | |||
| else: | |||
| ba = sig.bind(*args, **kwargs) | |||
| ba.apply_defaults() | |||
| params = list(sig.parameters.keys()) | |||
| user_filled_args = [ba.arguments.get(arg_value) for arg_value in params] | |||
| return user_filled_args, ba.arguments | |||
| def type_check(arg, types, arg_name): | |||
| """ | |||
| Check the type of the parameter. | |||
| Args: | |||
| arg (Any) : any variable. | |||
| types (tuple): tuple of all valid types for arg. | |||
| arg_name (str): the name of arg. | |||
| Returns: | |||
| Exception: when the type is not correct, otherwise nothing. | |||
| """ | |||
| # handle special case of booleans being a subclass of ints | |||
| print_value = '\"\"' if repr(arg) == repr('') else arg | |||
| if int in types and bool not in types: | |||
| if isinstance(arg, bool): | |||
| raise TypeError("Argument {0} with value {1} is not of type {2}.".format(arg_name, print_value, types)) | |||
| if not isinstance(arg, types): | |||
| raise TypeError("Argument {0} with value {1} is not of type {2}.".format(arg_name, print_value, types)) | |||
| def type_check_list(args, types, arg_names): | |||
| """ | |||
| Check the type of each parameter in the list. | |||
| Args: | |||
| args (Union[list, tuple]): a list or tuple of any variable. | |||
| types (tuple): tuple of all valid types for arg. | |||
| arg_names (Union[list, tuple of str]): the names of args. | |||
| Returns: | |||
| Exception: when the type is not correct, otherwise nothing. | |||
| """ | |||
| type_check(args, (list, tuple,), arg_names) | |||
| if len(args) != len(arg_names) and not isinstance(arg_names, str): | |||
| raise ValueError("List of arguments is not the same length as argument_names.") | |||
| if isinstance(arg_names, str): | |||
| arg_names = ["{0}[{1}]".format(arg_names, i) for i in range(len(args))] | |||
| for arg, arg_name in zip(args, arg_names): | |||
| type_check(arg, types, arg_name) | |||
| @@ -0,0 +1,231 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| Validator Functions for Offline Debugger APIs. | |||
| """ | |||
| from functools import wraps | |||
| import mindspore.offline_debug.dbg_services as cds | |||
| from mindspore.offline_debug.mi_validator_helpers import parse_user_args, type_check, type_check_list, check_dir, check_uint32, check_uint64 | |||
| def check_init(method): | |||
| """Wrapper method to check the parameters of DbgServices init.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [dump_file_path, verbose], _ = parse_user_args(method, *args, **kwargs) | |||
| type_check(dump_file_path, (str,), "dump_file_path") | |||
| type_check(verbose, (bool,), "verbose") | |||
| check_dir(dump_file_path) | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_initialize(method): | |||
| """Wrapper method to check the parameters of DbgServices Initialize method.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [net_name, is_sync_mode], _ = parse_user_args(method, *args, **kwargs) | |||
| type_check(net_name, (str,), "net_name") | |||
| type_check(is_sync_mode, (bool,), "is_sync_mode") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_add_watchpoint(method): | |||
| """Wrapper method to check the parameters of DbgServices AddWatchpoint.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [id_value, watch_condition, check_node_list, parameter_list], _ = parse_user_args(method, *args, **kwargs) | |||
| check_uint32(id_value, "id") | |||
| check_uint32(watch_condition, "watch_condition") | |||
| type_check(check_node_list, (dict,), "check_node_list") | |||
| for node_name, node_info in check_node_list.items(): | |||
| type_check(node_name, (str,), "node_name") | |||
| type_check(node_info, (dict,), "node_info") | |||
| for info_name, info_param in node_info.items(): | |||
| type_check(info_name, (str,), "node parameter name") | |||
| if info_name in ["device_id"]: | |||
| if isinstance(info_param, str): | |||
| if info_param not in ["*"]: | |||
| raise ValueError("Node parameter {} only accepts '*' as string.".format(info_name)) | |||
| else: | |||
| for param in info_param: | |||
| check_uint32(param, "device_id") | |||
| elif info_name in ["root_graph_id"]: | |||
| if isinstance(info_param, str): | |||
| if info_param not in ["*"]: | |||
| raise ValueError("Node parameter {} only accepts '*' as string.".format(info_name)) | |||
| else: | |||
| for param in info_param: | |||
| check_uint32(param, "root_graph_id") | |||
| elif info_name in ["is_parameter"]: | |||
| type_check(info_param, (bool,), "is_parameter") | |||
| else: | |||
| raise ValueError("Node parameter {} is not defined.".format(info_name)) | |||
| param_names = ["param_{0}".format(i) for i in range(len(parameter_list))] | |||
| type_check_list(parameter_list, (cds.Parameter,), param_names) | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_remove_watchpoint(method): | |||
| """Wrapper method to check the parameters of DbgServices RemoveWatchpoint.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [id_value], _ = parse_user_args(method, *args, **kwargs) | |||
| check_uint32(id_value, "id") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_check_watchpoints(method): | |||
| """Wrapper method to check the parameters of DbgServices CheckWatchpoint.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [iteration], _ = parse_user_args(method, *args, **kwargs) | |||
| check_uint32(iteration, "iteration") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_read_tensors(method): | |||
| """Wrapper method to check the parameters of DbgServices ReadTensors.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [info_list], _ = parse_user_args(method, *args, **kwargs) | |||
| info_names = ["info_{0}".format(i) for i in range(len(info_list))] | |||
| type_check_list(info_list, (cds.TensorInfo,), info_names) | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_initialize_done(method): | |||
| """Wrapper method to check if initlize is done for DbgServices.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| if not self.initialized: | |||
| raise RuntimeError("Inilize should be called before any other methods of DbgServices!") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_tensor_info_init(method): | |||
| """Wrapper method to check the parameters of DbgServices TensorInfo init.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [node_name, slot, iteration, device_id, root_graph_id, | |||
| is_parameter], _ = parse_user_args(method, *args, **kwargs) | |||
| type_check(node_name, (str,), "node_name") | |||
| check_uint32(slot, "slot") | |||
| check_uint32(iteration, "iteration") | |||
| check_uint32(device_id, "device_id") | |||
| check_uint32(root_graph_id, "root_graph_id") | |||
| type_check(is_parameter, (bool,), "is_parameter") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_tensor_data_init(method): | |||
| """Wrapper method to check the parameters of DbgServices TensorData init.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [data_ptr, data_size, dtype, shape], _ = parse_user_args(method, *args, **kwargs) | |||
| type_check(data_ptr, (bytes,), "data_ptr") | |||
| check_uint64(data_size, "data_size") | |||
| type_check(dtype, (int,), "dtype") | |||
| shape_names = ["shape_{0}".format(i) for i in range(len(shape))] | |||
| type_check_list(shape, (int,), shape_names) | |||
| if len(data_ptr) != data_size: | |||
| raise ValueError("data_ptr length ({0}) is not equal to data_size ({1}).".format(len(data_ptr), data_size)) | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_watchpoint_hit_init(method): | |||
| """Wrapper method to check the parameters of DbgServices WatchpointHit init.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [name, slot, condition, watchpoint_id, | |||
| parameters, error_code, device_id, root_graph_id], _ = parse_user_args(method, *args, **kwargs) | |||
| type_check(name, (str,), "name") | |||
| check_uint32(slot, "slot") | |||
| type_check(condition, (int,), "condition") | |||
| check_uint32(watchpoint_id, "watchpoint_id") | |||
| param_names = ["param_{0}".format(i) for i in range(len(parameters))] | |||
| type_check_list(parameters, (cds.Parameter,), param_names) | |||
| type_check(error_code, (int,), "error_code") | |||
| check_uint32(device_id, "device_id") | |||
| check_uint32(root_graph_id, "root_graph_id") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_parameter_init(method): | |||
| """Wrapper method to check the parameters of DbgServices Parameter init.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| [name, disabled, value, hit, actual_value], _ = parse_user_args(method, *args, **kwargs) | |||
| type_check(name, (str,), "name") | |||
| type_check(disabled, (bool,), "disabled") | |||
| type_check(value, (float,), "value") | |||
| type_check(hit, (bool,), "hit") | |||
| type_check(actual_value, (float,), "actual_value") | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||