| @@ -24,7 +24,7 @@ usage() | |||
| { | |||
| echo "Usage:" | |||
| echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" | |||
| echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" | |||
| echo " [-a on|off] [-Q on|off] [-S on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" | |||
| echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]" | |||
| echo "" | |||
| echo "Options:" | |||
| @@ -48,6 +48,7 @@ usage() | |||
| echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" | |||
| echo " -Q Enable dump memory, default off" | |||
| echo " -D Enable dumping of function graph ir, default on" | |||
| echo " -S Enable async data dump, default off" | |||
| echo " -z Compile dataset & mindrecord, default on" | |||
| echo " -M Enable MPI and NCCL for GPU training, gpu default on" | |||
| echo " -V Specify the minimum required cuda version, default CUDA 10.1" | |||
| @@ -88,6 +89,7 @@ checkopts() | |||
| ENABLE_TIMELINE="off" | |||
| ENABLE_DUMP2PROTO="on" | |||
| ENABLE_DUMPE2E="off" | |||
| ENABLE_DATA_DUMP="off" | |||
| ENABLE_DUMP_IR="on" | |||
| COMPILE_MINDDATA="on" | |||
| ENABLE_MPI="off" | |||
| @@ -102,7 +104,7 @@ checkopts() | |||
| ENABLE_PYTHON="on" | |||
| # Process the options | |||
| while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:sB:E' opt | |||
| while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:S:D:zM:V:K:sB:E' opt | |||
| do | |||
| OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') | |||
| case "${opt}" in | |||
| @@ -218,6 +220,11 @@ checkopts() | |||
| ENABLE_DUMPE2E="$OPTARG" | |||
| echo "enable dump end to end" | |||
| ;; | |||
| S) | |||
| check_on_off $OPTARG S | |||
| ENABLE_DATA_DUMP="$OPTARG" | |||
| echo "enable data dump" | |||
| ;; | |||
| D) | |||
| check_on_off $OPTARG D | |||
| ENABLE_DUMP_IR="$OPTARG" | |||
| @@ -321,6 +328,9 @@ build_mindspore() | |||
| if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON" | |||
| fi | |||
| if [[ "X$ENABLE_DATA_DUMP" = "Xon" ]]; then | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DATA_DUMP=ON" | |||
| fi | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}" | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}" | |||
| if [[ "X$ENABLE_MPI" = "Xon" ]]; then | |||
| @@ -116,6 +116,10 @@ if(ENABLE_DUMP_E2E) | |||
| add_compile_definitions(ENABLE_DUMP_E2E) | |||
| endif() | |||
| if(ENABLE_DATA_DUMP) | |||
| add_compile_definitions(ENABLE_DATA_DUMP) | |||
| endif() | |||
| if(ENABLE_DEBUGGER) | |||
| add_compile_definitions(ENABLE_DEBUGGER) | |||
| endif() | |||
| @@ -0,0 +1,15 @@ | |||
| { | |||
| "DumpSettings": { | |||
| "net_name": "ResNet50", | |||
| "mode": 1, | |||
| "iteration": 0, | |||
| "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] | |||
| }, | |||
| "DumpSettingsSpec": { | |||
| "net_name": "net name eg:ResNet50", | |||
| "mode": "0: dump all kernels, 1: dump kernels in kernels list", | |||
| "iteration": "specified iteration ", | |||
| "kernels": "op's full scope name which need to be dump" | |||
| } | |||
| } | |||
| @@ -1 +1 @@ | |||
| Subproject commit 1c2672868fda8b1d012c99e5aca73725ac869ba9 | |||
| Subproject commit 18cf690152add623ffbddfbbb4674d1b34484ca7 | |||
| @@ -109,8 +109,12 @@ if (ENABLE_D) | |||
| file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto") | |||
| ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER}) | |||
| file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/dump/proto/*.proto") | |||
| ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP}) | |||
| list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS}) | |||
| list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS}) | |||
| list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS}) | |||
| add_compile_definitions(ENABLE_D) | |||
| endif () | |||
| @@ -19,6 +19,15 @@ if (ENABLE_DEBUGGER) | |||
| ) | |||
| endif (ENABLE_DEBUGGER) | |||
| if (ENABLE_D) | |||
| list(APPEND _DEBUG_SRC_LIST | |||
| "${CMAKE_CURRENT_SOURCE_DIR}/common.cc" | |||
| ) | |||
| if (ENABLE_DATA_DUMP) | |||
| list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/data_dump_parser.cc") | |||
| endif(ENABLE_DATA_DUMP) | |||
| endif() | |||
| if (ENABLE_DUMP_E2E) | |||
| list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/e2e_dump.cc") | |||
| endif (ENABLE_DUMP_E2E) | |||
| @@ -0,0 +1,125 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "debug/common.h" | |||
| #include <memory> | |||
| #include <optional> | |||
| #include "utils/system/env.h" | |||
| #include "utils/system/file_system.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "utils/context/ms_context.h" | |||
| namespace mindspore { | |||
| std::optional<std::string> Common::GetRealPath(const std::string &input_path) { | |||
| std::string out_path; | |||
| auto path_split_pos = input_path.find_last_of('/'); | |||
| if (path_split_pos == std::string::npos) { | |||
| path_split_pos = input_path.find_last_of('\\'); | |||
| } | |||
| // get real path | |||
| char real_path[PATH_MAX] = {0}; | |||
| if (path_split_pos != std::string::npos) { | |||
| std::string prefix_path = input_path.substr(0, path_split_pos); | |||
| if (prefix_path.length() >= PATH_MAX) { | |||
| MS_LOG(ERROR) << "Prefix path is too longer!"; | |||
| return std::nullopt; | |||
| } | |||
| std::string last_path = input_path.substr(path_split_pos, input_path.length() - path_split_pos); | |||
| auto ret = CreateNotExistDirs(prefix_path); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "CreateNotExistDirs Failed!"; | |||
| return std::nullopt; | |||
| } | |||
| if (nullptr == realpath(prefix_path.c_str(), real_path)) { | |||
| MS_LOG(ERROR) << "dir " << prefix_path << " does not exit."; | |||
| return std::nullopt; | |||
| } | |||
| out_path = std::string(real_path) + last_path; | |||
| } | |||
| if (path_split_pos == std::string::npos) { | |||
| if (input_path.length() >= PATH_MAX) { | |||
| MS_LOG(ERROR) << "Prefix path is too longer!"; | |||
| return std::nullopt; | |||
| } | |||
| if (nullptr == realpath(input_path.c_str(), real_path)) { | |||
| MS_LOG(ERROR) << "File " << input_path << " does not exit, it will be created."; | |||
| } | |||
| out_path = std::string(real_path); | |||
| } | |||
| return out_path; | |||
| } | |||
| bool Common::CreateNotExistDirs(const std::string &path) { | |||
| std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem(); | |||
| MS_EXCEPTION_IF_NULL(fs); | |||
| char temp_path[PATH_MAX] = {0}; | |||
| if (path.length() > PATH_MAX) { | |||
| MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX; | |||
| return false; | |||
| } | |||
| for (uint32_t i = 0; i < path.length(); i++) { | |||
| temp_path[i] = path[i]; | |||
| if (temp_path[i] == '\\' || temp_path[i] == '/') { | |||
| if (i != 0) { | |||
| char tmp_char = temp_path[i]; | |||
| temp_path[i] = '\0'; | |||
| std::string path_handle(temp_path); | |||
| if (!fs->FileExist(temp_path)) { | |||
| MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating..."; | |||
| if (!fs->CreateDir(temp_path)) { | |||
| MS_LOG(ERROR) << "Create " << path_handle << " dir error"; | |||
| return false; | |||
| } | |||
| } | |||
| temp_path[i] = tmp_char; | |||
| } | |||
| } | |||
| } | |||
| if (!fs->FileExist(path)) { | |||
| MS_LOG(INFO) << "Dir " << path << " does not exit, creating..."; | |||
| if (!fs->CreateDir(path)) { | |||
| MS_LOG(ERROR) << "Create " << path << " dir error"; | |||
| return false; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| std::optional<std::string> Common::GetConfigFile(const std::string &env) { | |||
| if (env.empty()) { | |||
| MS_LOG(EXCEPTION) << "Invalid env"; | |||
| } | |||
| auto config_path_str = std::getenv(env.c_str()); | |||
| if (config_path_str == nullptr) { | |||
| MS_LOG(ERROR) << "Please export env:" << env; | |||
| return {}; | |||
| } | |||
| MS_LOG(INFO) << "Async Dump Getenv env:" << env << "=" << config_path_str; | |||
| std::string dump_config_file(config_path_str); | |||
| std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem(); | |||
| MS_EXCEPTION_IF_NULL(fs); | |||
| if (!fs->FileExist(dump_config_file)) { | |||
| MS_LOG(ERROR) << dump_config_file << " not exist."; | |||
| return {}; | |||
| } | |||
| return dump_config_file; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,36 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_ | |||
| #include <string> | |||
| #include <optional> | |||
| #include "utils/contract.h" | |||
| namespace mindspore { | |||
| class Common { | |||
| public: | |||
| Common() = default; | |||
| ~Common() = default; | |||
| static std::optional<std::string> GetRealPath(const std::string &input_path); | |||
| static std::optional<std::string> GetConfigFile(const std::string &env); | |||
| private: | |||
| static bool CreateNotExistDirs(const std::string &path); | |||
| }; | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_ | |||
| @@ -0,0 +1,152 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "debug/data_dump_parser.h" | |||
| #include <fstream> | |||
| #include "utils/context/ms_context.h" | |||
| #include "debug/common.h" | |||
| constexpr auto kDataDumpConfigPtah = "DATA_DUMP_CONFIG_PATH"; | |||
| constexpr auto kEnableDataDump = "ENABLE_DATA_DUMP"; | |||
| constexpr auto kDataDumpPath = "DATA_DUMP_PATH"; | |||
| namespace mindspore { | |||
| void DataDumpParser::ResetParam() { | |||
| enable_ = false; | |||
| net_name_.clear(); | |||
| dump_mode_ = 0; | |||
| dump_step_ = 0; | |||
| kernel_set_.clear(); | |||
| } | |||
| bool DataDumpParser::DumpEnabled() const { | |||
| auto enable_dump = std::getenv(kEnableDataDump); | |||
| if (!enable_dump) { | |||
| MS_LOG(WARNING) << "[DataDump] enable dump is null. Please export ENABLE_DATA_DUMP"; | |||
| return false; | |||
| } | |||
| auto enabled = std::atoi(enable_dump); | |||
| if (enabled != 1) { | |||
| MS_LOG(WARNING) << "[DataDump] Please export ENABLE_DATA_DUMP=1"; | |||
| return false; | |||
| } | |||
| auto context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context); | |||
| if (context->execution_mode() == kPynativeMode) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] PyNative mode not support data dump"; | |||
| } | |||
| return true; | |||
| } | |||
| std::optional<std::string> DataDumpParser::GetDumpPath() const { | |||
| auto dump_path = std::getenv(kDataDumpPath); | |||
| if (!dump_path) { | |||
| MS_LOG(ERROR) << "[DataDump] dump path is null. Please export DATA_DUMP_PATH"; | |||
| return {}; | |||
| } | |||
| std::string dump_path_str(dump_path); | |||
| return dump_path_str; | |||
| } | |||
| void DataDumpParser::ParseDumpConfig() { | |||
| std::lock_guard<std::mutex> guard(lock_); | |||
| MS_LOG(INFO) << "[DataDump] parse start"; | |||
| if (!DumpEnabled()) { | |||
| MS_LOG(INFO) << "[DataDump] dump not enable"; | |||
| return; | |||
| } | |||
| ResetParam(); | |||
| auto dump_config_file = Common::GetConfigFile(kDataDumpConfigPtah); | |||
| if (!dump_config_file.has_value()) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] Get config file failed"; | |||
| } | |||
| std::ifstream json_file(dump_config_file.value()); | |||
| if (!json_file.is_open()) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] " << dump_config_file.value() << " open failed."; | |||
| } | |||
| nlohmann::json j; | |||
| json_file >> j; | |||
| if (j.find("DumpSettings") == j.end()) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] DumpSettings is not exist."; | |||
| } | |||
| nlohmann::json dump_settings = j.at("DumpSettings"); | |||
| // convert json to string | |||
| std::stringstream ss; | |||
| ss << dump_settings; | |||
| std::string cfg = ss.str(); | |||
| MS_LOG(INFO) << "[DataDump] Async dump settings Json: " << cfg; | |||
| if (!IsConfigExist(dump_settings)) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] Async dump json invalid"; | |||
| } | |||
| if (!ParseDumpSetting(dump_settings)) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] Parse dump json failed"; | |||
| } | |||
| } | |||
| bool DataDumpParser::NeedDump(const std::string &op_full_name) const { | |||
| if (!DumpEnabled()) { | |||
| return false; | |||
| } | |||
| if (dump_mode_ == 0) { | |||
| return true; | |||
| } | |||
| auto iter = kernel_set_.find(op_full_name); | |||
| return iter != kernel_set_.end(); | |||
| } | |||
| bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const { | |||
| if (dump_settings.find("mode") == dump_settings.end() || dump_settings.find("net_name") == dump_settings.end() || | |||
| dump_settings.find("iteration") == dump_settings.end() || dump_settings.find("kernels") == dump_settings.end()) { | |||
| MS_LOG(ERROR) << "[DataDump] DumpSettings keys are not exist."; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) { | |||
| auto mode = dump_settings.at("mode"); | |||
| auto net_name = dump_settings.at("net_name"); | |||
| auto iteration = dump_settings.at("iteration"); | |||
| auto kernels = dump_settings.at("kernels"); | |||
| if (!(mode.is_number() && net_name.is_string() && iteration.is_number() && kernels.is_array())) { | |||
| MS_LOG(ERROR) << "[DataDump] Element's type in Dump config json is invalid."; | |||
| enable_ = false; | |||
| return false; | |||
| } | |||
| enable_ = true; | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| dump_mode_ = mode; | |||
| net_name_ = net_name; | |||
| dump_step_ = iteration; | |||
| for (const auto &kernel : kernels) { | |||
| auto kernel_str = kernel.dump(); | |||
| kernel_str.erase(std::remove(kernel_str.begin(), kernel_str.end(), '\"'), kernel_str.end()); | |||
| MS_LOG(INFO) << "[DataDump] Need dump kernel:" << kernel_str; | |||
| kernel_set_.insert(kernel_str); | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,61 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_ | |||
| #include <string> | |||
| #include <set> | |||
| #include <mutex> | |||
| #include <optional> | |||
| #include "nlohmann/json.hpp" | |||
| #include "common/utils.h" | |||
| namespace mindspore { | |||
| class DataDumpParser { | |||
| public: | |||
| static DataDumpParser &GetInstance() { | |||
| static DataDumpParser instance; | |||
| return instance; | |||
| } | |||
| void ParseDumpConfig(); | |||
| bool NeedDump(const std::string &op_full_name) const; | |||
| bool DumpEnabled() const; | |||
| std::optional<std::string> GetDumpPath() const; | |||
| bool enable() const { return enable_; } | |||
| const std::string &net_name() const { return net_name_; } | |||
| uint32_t dump_mode() const { return dump_mode_; } | |||
| uint32_t dump_step() const { return dump_step_; } | |||
| const std::set<std::string> &kernel_set() const { return kernel_set_; } | |||
| private: | |||
| DataDumpParser() = default; | |||
| virtual ~DataDumpParser() = default; | |||
| DISABLE_COPY_AND_ASSIGN(DataDumpParser); | |||
| void ResetParam(); | |||
| bool IsConfigExist(const nlohmann::json &dump_settings) const; | |||
| bool ParseDumpSetting(const nlohmann::json &dump_settings); | |||
| std::mutex lock_; | |||
| bool enable_{false}; | |||
| std::string net_name_; | |||
| uint32_t dump_mode_{0}; | |||
| uint32_t dump_step_{0}; | |||
| std::set<std::string> kernel_set_; | |||
| }; | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_ | |||
| @@ -17,12 +17,14 @@ | |||
| #include <limits.h> | |||
| #include <fstream> | |||
| #include <string> | |||
| #include <optional> | |||
| #include <nlohmann/json.hpp> | |||
| #include "utils/log_adapter.h" | |||
| #include "utils/system/file_system.h" | |||
| #include "utils/system/env.h" | |||
| #include "utils/convert_utils.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "debug/common.h" | |||
| using json = nlohmann::json; | |||
| @@ -158,100 +160,19 @@ bool Dump::DumpToFile(const std::string &filename, const void *data, size_t len) | |||
| return false; | |||
| } | |||
| std::string realpath; | |||
| bool ret = GetRealPath(filename, &realpath); | |||
| if (!ret) { | |||
| auto realpath = Common::GetRealPath(filename); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Get real path failed."; | |||
| return false; | |||
| } | |||
| std::ofstream fd; | |||
| fd.open(realpath, std::ios::binary | std::ios::out); | |||
| fd.open(realpath.value(), std::ios::binary | std::ios::out); | |||
| if (!fd.is_open()) { | |||
| MS_LOG(ERROR) << "Open file " << realpath << " fail."; | |||
| MS_LOG(ERROR) << "Open file " << realpath.value() << " fail."; | |||
| return false; | |||
| } | |||
| (void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len)); | |||
| fd.close(); | |||
| return true; | |||
| } | |||
| bool Dump::GetRealPath(const std::string &inpath, std::string *outpath) { | |||
| MS_EXCEPTION_IF_NULL(outpath); | |||
| auto path_split_pos = inpath.find_last_of('/'); | |||
| if (path_split_pos == std::string::npos) { | |||
| path_split_pos = inpath.find_last_of('\\'); | |||
| } | |||
| // get real path | |||
| char real_path[PATH_MAX] = {0}; | |||
| if (path_split_pos != std::string::npos) { | |||
| std::string prefix_path = inpath.substr(0, path_split_pos); | |||
| if (prefix_path.length() >= PATH_MAX) { | |||
| MS_LOG(ERROR) << "Prefix path is too longer!"; | |||
| return false; | |||
| } | |||
| std::string last_path = inpath.substr(path_split_pos, inpath.length() - path_split_pos); | |||
| auto ret = CreateNotExistDirs(prefix_path); | |||
| if (ret == false) { | |||
| MS_LOG(ERROR) << "CreateNotExistDirs Failed!"; | |||
| return false; | |||
| } | |||
| if (nullptr == realpath(prefix_path.c_str(), real_path)) { | |||
| MS_LOG(ERROR) << "dir " << prefix_path << " does not exit."; | |||
| return false; | |||
| } | |||
| *outpath = std::string(real_path) + last_path; | |||
| } | |||
| if (path_split_pos == std::string::npos) { | |||
| if (inpath.length() >= PATH_MAX) { | |||
| MS_LOG(ERROR) << "Prefix path is too longer!"; | |||
| return false; | |||
| } | |||
| if (nullptr == realpath(inpath.c_str(), real_path)) { | |||
| MS_LOG(ERROR) << "File " << inpath << " does not exit, it will be created."; | |||
| } | |||
| *outpath = std::string(real_path); | |||
| } | |||
| return true; | |||
| } | |||
| bool Dump::CreateNotExistDirs(const std::string &path) { | |||
| std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem(); | |||
| MS_EXCEPTION_IF_NULL(fs); | |||
| char temp_path[PATH_MAX] = {0}; | |||
| if (path.length() > PATH_MAX) { | |||
| MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX; | |||
| return false; | |||
| } | |||
| for (uint32_t i = 0; i < path.length(); i++) { | |||
| temp_path[i] = path[i]; | |||
| if (temp_path[i] == '\\' || temp_path[i] == '/') { | |||
| if (i != 0) { | |||
| char tmp_char = temp_path[i]; | |||
| temp_path[i] = '\0'; | |||
| std::string path_handle(temp_path); | |||
| if (!fs->FileExist(temp_path)) { | |||
| MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating..."; | |||
| if (!fs->CreateDir(temp_path)) { | |||
| MS_LOG(ERROR) << "Create " << path_handle << " dir error"; | |||
| return false; | |||
| } | |||
| } | |||
| temp_path[i] = tmp_char; | |||
| } | |||
| } | |||
| } | |||
| if (!fs->FileExist(path)) { | |||
| MS_LOG(INFO) << "Dir " << path << " does not exit, creating..."; | |||
| if (!fs->CreateDir(path)) { | |||
| MS_LOG(ERROR) << "Create " << path << " dir error"; | |||
| return false; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -59,10 +59,6 @@ class Dump { | |||
| uint32_t cur_iter_; | |||
| std::vector<std::string> dump_kernels_; | |||
| static bool GetRealPath(const std::string &inpath, std::string *outpath); | |||
| static bool CreateNotExistDirs(const std::string &path); | |||
| private: | |||
| bool ParseDumpConfig(const std::string &dump_config_file); | |||
| bool IsConfigExist(const nlohmann::json &dumpSettings); | |||
| @@ -42,6 +42,7 @@ | |||
| #include "device/ascend/ascend_memory_manager.h" | |||
| #include "debug/tensor_load.h" | |||
| using ge::model_runner::ModelRunner; | |||
| using mindspore::device::ascend::ProfilingManager; | |||
| using mindspore::device::ascend::ProfilingUtils; | |||
| using mindspore::device::ascend::tasksink::TaskGenerator; | |||
| @@ -90,9 +91,16 @@ std::string GetRankId() { | |||
| AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); } | |||
| void AscendKernelRuntime::ClearGraphModelMap() { | |||
| #ifdef ENABLE_DATA_DUMP | |||
| for (auto &iter : graph_data_dumper_) { | |||
| MS_LOG(INFO) << "[DataDump] Unload data dumper:" << iter.first; | |||
| iter.second->UnloadDumpInfo(); | |||
| } | |||
| graph_data_dumper_.clear(); | |||
| #endif | |||
| for (auto &iter : graph_model_map_) { | |||
| MS_LOG(INFO) << "Ge UnloadModel " << iter.first; | |||
| auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter.first); | |||
| auto ret = ModelRunner::Instance().UnloadModel(iter.first); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "UnloadModel failed"; | |||
| } | |||
| @@ -107,7 +115,7 @@ void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) { | |||
| return; | |||
| } | |||
| MS_LOG(DEBUG) << "Ge UnloadModel " << iter->first; | |||
| auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter->first); | |||
| auto ret = ModelRunner::Instance().UnloadModel(iter->first); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "UnloadModel failed"; | |||
| } | |||
| @@ -159,6 +167,10 @@ bool AscendKernelRuntime::Init() { | |||
| } | |||
| #endif | |||
| #ifdef ENABLE_DATA_DUMP | |||
| DataDumpParser::GetInstance().ParseDumpConfig(); | |||
| #endif | |||
| // Start up profiling before rtSetDevice | |||
| ret = ProfilingManager::GetInstance().StartupProfiling(device_id_); | |||
| if (!ret) { | |||
| @@ -440,7 +452,7 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) { | |||
| << ", wait_active_stream_list size:" << wait_active_stream_list.size() | |||
| << ", force_copy_stream_list size:" << force_copy_stream_list.size(); | |||
| std::vector<std::shared_ptr<ge::model_runner::OpInfo>> empty_list; | |||
| std::shared_ptr<ge::model_runner::DavinciModel> model = std::make_shared<ge::model_runner::DavinciModel>( | |||
| auto model = std::make_shared<ge::model_runner::DavinciModel>( | |||
| task_info_list, empty_list, empty_list, empty_list, empty_list, wait_active_stream_list, force_copy_stream_list, 0, | |||
| 0, 0, 0, 0, 0, resource_manager.get_cur_stream_num(), label_assign_instance.GetLabelNum(NOT_NULL(graph)), | |||
| resource_manager.get_cur_event_num(), 0); | |||
| @@ -477,21 +489,45 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) { | |||
| std::shared_ptr<ge::ModelListener> listener; | |||
| MS_LOG(INFO) << "LoadDavinciModel mode_id:" << model_iter->first; | |||
| bool status = ge::model_runner::ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first, | |||
| model_iter->second, listener); | |||
| bool status = | |||
| ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first, model_iter->second, listener); | |||
| if (!status) { | |||
| MS_LOG(EXCEPTION) << "Load Task Failed"; | |||
| } | |||
| if (ProfilingManager::GetInstance().IsProfiling()) { | |||
| auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(model_iter->first); | |||
| auto stream_ids = ge::model_runner::ModelRunner::Instance().GetStreamIdList(model_iter->first); | |||
| auto task_ids = ModelRunner::Instance().GetTaskIdList(model_iter->first); | |||
| auto stream_ids = ModelRunner::Instance().GetStreamIdList(model_iter->first); | |||
| ProfilingUtils::ReportProfilingData(task_ids, stream_ids, NOT_NULL(graph)); | |||
| } | |||
| #ifdef ENABLE_DATA_DUMP | |||
| LaunchDataDump(NOT_NULL(graph)); | |||
| #endif | |||
| if (!ModelRunner::Instance().LoadModelComplete(model_iter->first)) { | |||
| MS_LOG(ERROR) << "Call ge runtime LoadModelComplete failed"; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| #ifdef ENABLE_DATA_DUMP | |||
| void AscendKernelRuntime::LaunchDataDump(NotNull<const session::KernelGraph *> graph) { | |||
| if (!DataDumpParser::GetInstance().DumpEnabled()) { | |||
| return; | |||
| } | |||
| auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph->graph_id()); | |||
| auto data_dumper = std::make_shared<DataDumper>(graph.get(), runtime_info_map); | |||
| MS_EXCEPTION_IF_NULL(data_dumper); | |||
| data_dumper->LoadDumpInfo(); | |||
| auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper); | |||
| if (!ret.second) { | |||
| MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed"; | |||
| } | |||
| } | |||
| #endif | |||
| void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) { | |||
| auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(graph_id); | |||
| auto task_ids = ModelRunner::Instance().GetTaskIdList(graph_id); | |||
| auto graph_task_names = ProfilingUtils::graph_kernel_name(); | |||
| auto iter = graph_task_names.find(graph_id); | |||
| if (iter != graph_task_names.end()) { | |||
| @@ -524,7 +560,7 @@ bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) { | |||
| return false; | |||
| } | |||
| bool status = ge::model_runner::ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors); | |||
| bool status = ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors); | |||
| if (!status) { | |||
| MS_LOG(ERROR) << "Run task failed"; | |||
| DebugTaskIdName(graph->graph_id()); | |||
| @@ -24,6 +24,10 @@ | |||
| #include "framework/ge_runtime/davinci_model.h" | |||
| #include "device/kernel_runtime_manager.h" | |||
| #include "session/session_basic.h" | |||
| #ifdef ENABLE_DATA_DUMP | |||
| #include "debug/data_dump_parser.h" | |||
| #include "device/ascend/dump/data_dumper.h" | |||
| #endif | |||
| using ge::model_runner::TaskInfo; | |||
| using std::unordered_map; | |||
| @@ -66,6 +70,10 @@ class AscendKernelRuntime : public KernelRuntime { | |||
| bool initialized_{false}; | |||
| unordered_map<GraphId, vector<std::shared_ptr<TaskInfo>>> task_map_; | |||
| unordered_map<GraphId, std::shared_ptr<ge::model_runner::DavinciModel>> graph_model_map_; | |||
| #ifdef ENABLE_DATA_DUMP | |||
| void LaunchDataDump(NotNull<const session::KernelGraph *> graph); | |||
| unordered_map<GraphId, std::shared_ptr<DataDumper>> graph_data_dumper_; | |||
| #endif | |||
| }; | |||
| MS_REG_KERNEL_RUNTIME(kAscendDevice, AscendKernelRuntime); | |||
| @@ -0,0 +1,282 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifdef ENABLE_DATA_DUMP | |||
| #include "device/ascend/dump/data_dumper.h" | |||
| #include <map> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "utility" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "runtime/mem.h" | |||
| #include "runtime/kernel.h" | |||
| #include "device/ascend/dump/ge_dump.h" | |||
| #include "proto/op_mapping_info.pb.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "debug/data_dump_parser.h" | |||
| constexpr uint32_t kAicpuLoadFlag = 1; | |||
| constexpr uint32_t kAicpuUnloadFlag = 0; | |||
| constexpr uint32_t kTupleTaskId = 0; | |||
| constexpr uint32_t kTupleStreamId = 1; | |||
| constexpr uint32_t kTupleArgs = 2; | |||
| constexpr uint32_t kCurrentStepTensorIndex = 0; | |||
| constexpr uint32_t kCurrentEpochTensorIndex = 1; | |||
| constexpr uint32_t kStepsPerEpochTensorIndex = 2; | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task); | |||
| void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task); | |||
| void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr); | |||
| DataDumper::~DataDumper() { | |||
| ReleaseDevMem(&dev_load_mem_); | |||
| ReleaseDevMem(&dev_unload_mem_); | |||
| } | |||
| void DataDumper::LoadDumpInfo() { | |||
| MS_LOG(INFO) << "[DataDump] LoadDumpInfo start"; | |||
| MS_EXCEPTION_IF_NULL(kernel_graph_); | |||
| aicpu::dump::OpMappingInfo dump_info; | |||
| SetOpMappingInfo(NOT_NULL(&dump_info)); | |||
| auto kernels = kernel_graph_->execution_order(); | |||
| for (const auto &kernel : kernels) { | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| if (!KernelNeedDump(kernel)) { | |||
| continue; | |||
| } | |||
| MS_LOG(INFO) << "[DataDump] LoadDumpInfo kernel:" << kernel->fullname_with_scope(); | |||
| dump_kernel_names_.emplace_back(kernel->fullname_with_scope()); | |||
| aicpu::dump::Task task; | |||
| ConstructDumpTask(NOT_NULL(kernel), NOT_NULL(&task)); | |||
| MS_EXCEPTION_IF_NULL(dump_info.mutable_task()); | |||
| dump_info.mutable_task()->Add(std::move(task)); | |||
| } | |||
| RtLoadDumpData(dump_info, &dev_load_mem_); | |||
| load_flag_ = true; | |||
| MS_LOG(INFO) << "[DataDump] LoadDumpInfo end"; | |||
| } | |||
| void DataDumper::SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const { | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| MS_EXCEPTION_IF_NULL(kernel_graph_); | |||
| auto dump_path = DataDumpParser::GetInstance().GetDumpPath(); | |||
| if (!dump_path.has_value()) { | |||
| MS_LOG(EXCEPTION) << "Dump path invalid"; | |||
| } | |||
| auto device_id = context_ptr->device_id(); | |||
| dump_info->set_dump_path(dump_path.value() + "_" + std::to_string(device_id) + "/"); | |||
| MS_LOG(INFO) << "[DataDump] dump_path:" << dump_path.value(); | |||
| dump_info->set_model_name(DataDumpParser::GetInstance().net_name() + "_" + std::to_string(kernel_graph_->graph_id())); | |||
| dump_info->set_dump_step(std::to_string(DataDumpParser::GetInstance().dump_step())); | |||
| dump_info->set_model_id(kernel_graph_->graph_id()); | |||
| dump_info->set_flag(kAicpuLoadFlag); | |||
| const auto &input_ctrl_tensors = kernel_graph_->input_ctrl_tensors(); | |||
| if (input_ctrl_tensors == nullptr || input_ctrl_tensors->size() < 3) { | |||
| MS_LOG(INFO) << "[DataDump] Not data sink mode, input_ctrl_tensor"; | |||
| return; | |||
| } | |||
| const auto ¤t_step_tensor = input_ctrl_tensors->at(kCurrentStepTensorIndex); | |||
| const auto &currnet_epoch_tensor = input_ctrl_tensors->at(kCurrentEpochTensorIndex); | |||
| const auto &steps_per_epoch_tensor = input_ctrl_tensors->at(kStepsPerEpochTensorIndex); | |||
| MS_EXCEPTION_IF_NULL(current_step_tensor); | |||
| MS_EXCEPTION_IF_NULL(currnet_epoch_tensor); | |||
| MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor); | |||
| MS_EXCEPTION_IF_NULL(current_step_tensor->device_address()); | |||
| MS_EXCEPTION_IF_NULL(currnet_epoch_tensor->device_address()); | |||
| MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor->device_address()); | |||
| void *current_step = current_step_tensor->device_address()->ptr_; | |||
| void *current_epoch = currnet_epoch_tensor->device_address()->ptr_; | |||
| void *steps_per_epoch = steps_per_epoch_tensor->device_address()->ptr_; | |||
| if (current_epoch != nullptr && current_step != nullptr && steps_per_epoch != nullptr) { | |||
| dump_info->set_step_id_addr(reinterpret_cast<uint64_t>(current_epoch)); | |||
| dump_info->set_loop_cond_addr(reinterpret_cast<uint64_t>(current_step)); | |||
| dump_info->set_iterations_per_loop_addr(reinterpret_cast<uint64_t>(steps_per_epoch)); | |||
| } else { | |||
| MS_LOG(INFO) << "Invalid ctrl tensor device address"; | |||
| } | |||
| } | |||
| bool DataDumper::KernelNeedDump(const CNodePtr &kernel) const { | |||
| if (AnfAlgo::GetKernelType(kernel) != TBE_KERNEL && AnfAlgo::GetKernelType(kernel) != AICPU_KERNEL && | |||
| AnfAlgo::GetKernelType(kernel) != AKG_KERNEL) { | |||
| return false; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| const auto &kernel_set = DataDumpParser::GetInstance().kernel_set(); | |||
| return kernel_set.find(kernel->fullname_with_scope()) != kernel_set.end(); | |||
| } | |||
| void DataDumper::UnloadDumpInfo() { | |||
| if (!load_flag_) { | |||
| MS_LOG(WARNING) << "Load not success, no need to unload"; | |||
| return; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(kernel_graph_); | |||
| MS_LOG(INFO) << "[DataDump] UnloadDumpInfo start. graphId:" << kernel_graph_->graph_id(); | |||
| aicpu::dump::OpMappingInfo op_mapping_info; | |||
| op_mapping_info.set_model_id(kernel_graph_->graph_id()); | |||
| op_mapping_info.set_flag(kAicpuUnloadFlag); | |||
| for (const auto &kernel_name : dump_kernel_names_) { | |||
| aicpu::dump::Task task; | |||
| auto iter = runtime_info_map_.find(kernel_name); | |||
| if (iter == runtime_info_map_.end()) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map"; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(iter->second); | |||
| auto task_id = std::get<kTupleTaskId>(*iter->second); | |||
| task.set_task_id(task_id); | |||
| MS_EXCEPTION_IF_NULL(op_mapping_info.mutable_task()); | |||
| op_mapping_info.mutable_task()->Add(std::move(task)); | |||
| } | |||
| RtLoadDumpData(op_mapping_info, &dev_unload_mem_); | |||
| } | |||
| void DataDumper::ReleaseDevMem(void **ptr) const { | |||
| if (ptr == nullptr) { | |||
| return; | |||
| } | |||
| if (*ptr != nullptr) { | |||
| rtError_t rt_error = rtFree(*ptr); | |||
| if (rt_error != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "[DataDump] Call rtFree failed, ret:" << rt_error; | |||
| } | |||
| *ptr = nullptr; | |||
| } | |||
| } | |||
| void DataDumper::ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const { | |||
| dump_task->set_end_graph(false); | |||
| auto iter = runtime_info_map_.find(kernel->fullname_with_scope()); | |||
| if (iter == runtime_info_map_.end()) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map"; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(iter->second); | |||
| auto task_id = std::get<kTupleTaskId>(*iter->second); | |||
| auto stream_id = std::get<kTupleStreamId>(*iter->second); | |||
| auto args = std::get<kTupleArgs>(*iter->second); | |||
| MS_LOG(INFO) << "[DataDump] Get runtime info task_id:" << task_id << " stream_id:" << stream_id; | |||
| dump_task->set_task_id(task_id); | |||
| dump_task->set_stream_id(stream_id); | |||
| MS_EXCEPTION_IF_NULL(dump_task->mutable_op()); | |||
| dump_task->mutable_op()->set_op_name(kernel->fullname_with_scope()); | |||
| dump_task->mutable_op()->set_op_type(AnfAlgo::GetCNodeName(kernel.get())); | |||
| DumpKernelOutput(kernel, args, dump_task); | |||
| DumpKernelInput(kernel, args, dump_task); | |||
| } | |||
| void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr) { | |||
| std::string proto_str; | |||
| size_t proto_size = dump_info.ByteSizeLong(); | |||
| bool ret = dump_info.SerializeToString(&proto_str); | |||
| if (!ret || proto_size == 0) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] Protobuf SerializeToString failed, proto size %zu."; | |||
| } | |||
| rtError_t rt_ret = rtMalloc(ptr, proto_size, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed"; | |||
| } | |||
| if (ptr == nullptr) { | |||
| MS_LOG(ERROR) << "[DataDump] rtMalloc failed, ptr is nullptr"; | |||
| return; | |||
| } | |||
| rt_ret = rtMemcpy(*ptr, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed"; | |||
| } | |||
| MS_LOG(INFO) << "[DataDump] rtDatadumpInfoLoad start"; | |||
| rt_ret = rtDatadumpInfoLoad(*ptr, proto_size); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| MS_LOG(EXCEPTION) << "[DataDump] Call rtDatadumpInfoLoad failed"; | |||
| } | |||
| } | |||
| void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) { | |||
| MS_LOG(INFO) << "[DataDump] DumpKernelOutput start. Kernel:" << kernel->fullname_with_scope(); | |||
| auto input_size = AnfAlgo::GetInputTensorNum(kernel); | |||
| auto output_size = AnfAlgo::GetOutputTensorNum(kernel); | |||
| uint64_t offset = sizeof(void *) * input_size; | |||
| for (size_t i = 0; i < output_size; ++i) { | |||
| auto data_type = AnfAlgo::GetOutputDeviceDataType(kernel, i); | |||
| auto output_format = AnfAlgo::GetOutputFormat(kernel, i); | |||
| auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel, i); | |||
| aicpu::dump::Output output; | |||
| output.set_data_type(GetGeDataType(data_type)); | |||
| output.set_format(GetGeFormat(output_format, output_shape.size())); | |||
| MS_EXCEPTION_IF_NULL(output.mutable_shape()); | |||
| for (auto dim : output_shape) { | |||
| output.mutable_shape()->add_dim(dim); | |||
| } | |||
| output.set_original_output_format(GetGeFormat(output_format, output_shape.size())); | |||
| output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset); | |||
| MS_EXCEPTION_IF_NULL(task->mutable_output()); | |||
| task->mutable_output()->Add(std::move(output)); | |||
| offset += sizeof(void *); | |||
| } | |||
| } | |||
| void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) { | |||
| MS_LOG(INFO) << "[DataDump] DumpKernelInput start. Kernel:" << kernel->fullname_with_scope(); | |||
| auto input_size = AnfAlgo::GetInputTensorNum(kernel); | |||
| uint64_t offset = 0; | |||
| for (size_t i = 0; i < input_size; ++i) { | |||
| aicpu::dump::Input input; | |||
| auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i); | |||
| auto input_node = input_node_with_index.first; | |||
| auto input_index = input_node_with_index.second; | |||
| std::string output_format = AnfAlgo::GetOutputFormat(input_node, input_index); | |||
| auto output_type = AnfAlgo::GetOutputDeviceDataType(input_node, input_index); | |||
| if (output_type == kTypeUnknown) { | |||
| MS_LOG(WARNING) << "[DataDump] It is not suggested to use a lonely weight parameter as the output of graph"; | |||
| output_type = AnfAlgo::GetOutputInferDataType(input_node, input_index); | |||
| } | |||
| auto output_shape = AnfAlgo::GetOutputDeviceShape(input_node, input_index); | |||
| input.set_data_type(GetGeDataType(output_type)); | |||
| input.set_format(GetGeFormat(output_format, output_shape.size())); | |||
| MS_EXCEPTION_IF_NULL(input.mutable_shape()); | |||
| for (auto dim : output_shape) { | |||
| input.mutable_shape()->add_dim(dim); | |||
| } | |||
| input.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset); | |||
| MS_EXCEPTION_IF_NULL(task->mutable_input()); | |||
| task->mutable_input()->Add(std::move(input)); | |||
| offset += sizeof(void *); | |||
| } | |||
| } | |||
| } // namespace ascend | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| #endif | |||
| @@ -0,0 +1,69 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_ | |||
| #ifdef ENABLE_DATA_DUMP | |||
| #include <tuple> | |||
| #include <map> | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| #include "session/kernel_graph.h" | |||
| namespace aicpu { | |||
| namespace dump { | |||
| class OpMappingInfo; | |||
| class Task; | |||
| } // namespace dump | |||
| } // namespace aicpu | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| // tuple(op_name, task_id, stream_id, args) | |||
| using RuntimeInfo = std::tuple<uint32_t, uint32_t, void *>; | |||
| class DataDumper { | |||
| public: | |||
| DataDumper(const session::KernelGraph *kernel_graph, | |||
| const std::map<std::string, std::shared_ptr<RuntimeInfo>> &runtime_info_map) | |||
| : load_flag_(false), | |||
| dev_load_mem_(nullptr), | |||
| dev_unload_mem_(nullptr), | |||
| kernel_graph_(kernel_graph), | |||
| runtime_info_map_(runtime_info_map) {} | |||
| ~DataDumper(); | |||
| void LoadDumpInfo(); | |||
| void UnloadDumpInfo(); | |||
| private: | |||
| void ReleaseDevMem(void **ptr) const; | |||
| bool KernelNeedDump(const CNodePtr &kernel) const; | |||
| void SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const; | |||
| void ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const; | |||
| bool load_flag_; | |||
| void *dev_load_mem_; | |||
| void *dev_unload_mem_; | |||
| std::vector<std::string> dump_kernel_names_; | |||
| const session::KernelGraph *kernel_graph_; | |||
| std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map_; | |||
| }; | |||
| } // namespace ascend | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| #endif | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_ | |||
| @@ -0,0 +1,120 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_ | |||
| #include <map> | |||
| #include <string> | |||
| #include "proto/ge_dtype.pb.h" | |||
| #include "ir/dtype/type_id.h" | |||
| #include "utils/utils.h" | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| static ge::proto::DataType GetGeDataType(TypeId type_id) { | |||
| static const std::map<TypeId, ge::proto::DataType> data_type_map = { | |||
| {TypeId::kTypeUnknown, ge::proto::DT_UNDEFINED}, {TypeId::kNumberTypeFloat32, ge::proto::DT_FLOAT}, | |||
| {TypeId::kNumberTypeFloat16, ge::proto::DT_FLOAT16}, {TypeId::kNumberTypeInt8, ge::proto::DT_INT8}, | |||
| {TypeId::kNumberTypeUInt8, ge::proto::DT_UINT8}, {TypeId::kNumberTypeInt16, ge::proto::DT_INT16}, | |||
| {TypeId::kNumberTypeUInt16, ge::proto::DT_UINT16}, {TypeId::kNumberTypeInt32, ge::proto::DT_INT32}, | |||
| {TypeId::kNumberTypeInt64, ge::proto::DT_INT64}, {TypeId::kNumberTypeUInt32, ge::proto::DT_UINT32}, | |||
| {TypeId::kNumberTypeUInt64, ge::proto::DT_UINT64}, {TypeId::kNumberTypeBool, ge::proto::DT_BOOL}, | |||
| {TypeId::kNumberTypeFloat64, ge::proto::DT_DOUBLE}, | |||
| }; | |||
| MS_LOG(INFO) << "Vm origin type_id:" << type_id; | |||
| auto iter = data_type_map.find(type_id); | |||
| if (iter == data_type_map.end()) { | |||
| MS_LOG(EXCEPTION) << "Invalid data type:" << type_id; | |||
| } | |||
| return iter->second; | |||
| } | |||
| enum GeFormat { | |||
| kFormat_NCHW = 0, // NCHW | |||
| kFormat_NHWC, // NHWC | |||
| kFormat_ND, // Nd Tensor | |||
| kFormat_NC1HWC0, // NC1HWC0 | |||
| kFormat_FRACTAL_Z, // FRACTAL_Z | |||
| kFormat_NC1C0HWPAD, | |||
| kFormat_NHWC1C0, | |||
| kFormat_FSR_NCHW, | |||
| kFormat_FRACTAL_DECONV, | |||
| kFormat_C1HWNC0, | |||
| kFormat_FRACTAL_DECONV_TRANSPOSE, | |||
| kFormat_FRACTAL_DECONV_SP_STRIDE_TRANS, | |||
| kFormat_NC1HWC0_C04, // NC1HWC0, C0 =4 | |||
| kFormat_FRACTAL_Z_C04, // FRACZ, C0 =4 | |||
| kFormat_CHWN, | |||
| kFormat_FRACTAL_DECONV_SP_STRIDE8_TRANS, | |||
| kFormat_HWCN, | |||
| kFormat_NC1KHKWHWC0, // KH,KW kernel h& kernel w maxpooling max output format | |||
| kFormat_BN_WEIGHT, | |||
| kFormat_FILTER_HWCK, // filter input tensor format | |||
| kFormat_HASHTABLE_LOOKUP_LOOKUPS = 20, | |||
| kFormat_HASHTABLE_LOOKUP_KEYS, | |||
| kFormat_HASHTABLE_LOOKUP_VALUE, | |||
| kFormat_HASHTABLE_LOOKUP_OUTPUT, | |||
| kFormat_HASHTABLE_LOOKUP_HITS = 24, | |||
| kFormat_C1HWNCoC0, | |||
| kFormat_MD, | |||
| kFormat_NDHWC, | |||
| kFormat_FRACTAL_ZZ, | |||
| kFormat_FRACTAL_NZ, | |||
| kFormat_NCDHW, | |||
| kFormat_DHWCN, // 3D filter input tensor format | |||
| kFormat_NDC1HWC0, | |||
| kFormat_FRACTAL_Z_3D, | |||
| kFormat_CN, | |||
| kFormat_NC, | |||
| kFormat_DHWNC, | |||
| kFormat_FRACTAL_Z_3D_TRANSPOSE, // 3D filter(transpose) input tensor format | |||
| kFormat_RESERVED, | |||
| kFormat_ALL | |||
| }; | |||
| static GeFormat GetGeFormat(const std::string &format, size_t shape_size) { | |||
| static const std::map<std::string, GeFormat> format_map = { | |||
| // default format: nchw, fractal_nz? | |||
| {kOpFormat_DEFAULT, kFormat_NCHW}, | |||
| {kOpFormat_NC1KHKWHWC0, kFormat_NC1KHKWHWC0}, | |||
| {kOpFormat_ND, kFormat_ND}, | |||
| {kOpFormat_NCHW, kFormat_NCHW}, | |||
| {kOpFormat_NHWC, kFormat_NHWC}, | |||
| {kOpFormat_HWCN, kFormat_HWCN}, | |||
| {kOpFormat_NC1HWC0, kFormat_NC1HWC0}, | |||
| {kOpFormat_FRAC_Z, kFormat_FRACTAL_Z}, | |||
| {kOpFormat_FRAC_NZ, kFormat_FRACTAL_NZ}, | |||
| {kOpFormat_C1HWNCoC0, kFormat_C1HWNCoC0}, | |||
| {kOpFormat_NC1HWC0_C04, kFormat_NC1HWC0_C04}, | |||
| {kOpFormat_FRACTAL_Z_C04, kFormat_FRACTAL_Z_C04}, | |||
| {kOpFormat_NDHWC, kFormat_NDHWC}, | |||
| }; | |||
| MS_LOG(INFO) << "GetGeFormat format:" << format << " shape_size:" << shape_size; | |||
| if (format == kOpFormat_DEFAULT) { | |||
| return shape_size == 4 ? kFormat_NCHW : kFormat_ND; | |||
| } | |||
| auto iter = format_map.find(format); | |||
| if (iter == format_map.end()) { | |||
| MS_LOG(EXCEPTION) << "Invalid format:" << format; | |||
| } | |||
| return iter->second; | |||
| } | |||
| } // namespace ascend | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_ | |||
| @@ -0,0 +1,49 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| syntax = "proto3"; | |||
| package ge.proto; | |||
| enum DataType | |||
| { | |||
| DT_UNDEFINED = 0; // Used to indicate a DataType field has not been set. | |||
| DT_FLOAT = 1; // float type | |||
| DT_FLOAT16 = 2; // fp16 type | |||
| DT_INT8 = 3; // int8 type | |||
| DT_UINT8 = 4; // uint8 type | |||
| DT_INT16 = 5; // int16 type | |||
| DT_UINT16 = 6; // uint16 type | |||
| DT_INT32 = 7; // | |||
| DT_INT64 = 8; // int64 type | |||
| DT_UINT32 = 9; // unsigned int32 | |||
| DT_UINT64 = 10; // unsigned int64 | |||
| DT_BOOL = 11; // bool type | |||
| DT_DOUBLE = 12; // double type | |||
| DT_STRING = 13; // string type | |||
| DT_DUAL_SUB_INT8 = 14; /**< dual output int8 type */ | |||
| DT_DUAL_SUB_UINT8 = 15; /**< dual output uint8 type */ | |||
| DT_COMPLEX64 = 16; // complex64 type | |||
| DT_COMPLEX128 = 17; // complex128 type | |||
| DT_QINT8 = 18; // qint8 type | |||
| DT_QINT16 = 19; // qint16 type | |||
| DT_QINT32 = 20; // qint32 type | |||
| DT_QUINT8 = 21; // quint8 type | |||
| DT_QUINT16 = 22; // quint16 type | |||
| DT_RESOURCE = 23; // resource type | |||
| DT_STRING_REF = 24; // string_ref type | |||
| DT_DUAL = 25; /**< dual output type */ | |||
| } | |||
| @@ -0,0 +1,78 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| syntax = "proto3"; | |||
| package aicpu.dump; | |||
| message Shape { | |||
| repeated uint64 dim = 1; | |||
| } | |||
| message Output { | |||
| int32 data_type = 1; | |||
| int32 format = 2; | |||
| Shape shape = 3; | |||
| uint64 address = 4; | |||
| string original_name = 5; | |||
| int32 original_output_index = 6; | |||
| int32 original_output_data_type = 7; | |||
| int32 original_output_format = 8; | |||
| uint64 size = 9; | |||
| }; | |||
| message Input { | |||
| int32 data_type = 1; | |||
| int32 format = 2; | |||
| Shape shape = 3; | |||
| uint64 address = 4; | |||
| uint64 size = 5; | |||
| } | |||
| message Op { | |||
| string op_name = 1; | |||
| string op_type = 2; | |||
| }; | |||
| message Task { | |||
| uint32 task_id = 1; | |||
| uint32 stream_id = 2; | |||
| Op op = 3; | |||
| repeated Output output = 4; | |||
| bool end_graph = 5; | |||
| repeated Input input = 6; | |||
| }; | |||
| message OpMappingInfo { | |||
| string dump_path = 1; | |||
| oneof model_name_param { | |||
| string model_name = 2; | |||
| } | |||
| oneof model_id_param { | |||
| uint32 model_id = 3; | |||
| } | |||
| oneof step_id { | |||
| uint64 step_id_addr = 4; | |||
| } | |||
| oneof iterations_per_loop { | |||
| uint64 iterations_per_loop_addr = 5; | |||
| } | |||
| oneof loop_cond { | |||
| uint64 loop_cond_addr = 6; | |||
| } | |||
| uint32 flag = 7; // 0x01 load, 0x00 unload | |||
| repeated Task task = 8; | |||
| string dump_step = 9; | |||
| }; | |||
| @@ -127,6 +127,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i | |||
| AddressPtrList kernel_outputs; | |||
| auto kernel_mod = AnfAlgo::GetKernelMod(anf_node_ptr); | |||
| MS_EXCEPTION_IF_NULL(kernel_mod); | |||
| kernel_mod->set_kernel_name(anf_node_ptr->fullname_with_scope()); | |||
| if (AnfAlgo::GetCNodeName(anf_node_ptr) != kAtomicAddrCleanOpName) { | |||
| for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_node_ptr); ++i) { | |||
| auto real_input_index = AnfAlgo::GetRealInputIndex(anf_node_ptr, i); | |||
| @@ -34,6 +34,7 @@ class CPUKernelRuntime; | |||
| namespace ascend { | |||
| class AscendKernelRuntime; | |||
| class AscendMemoryManager; | |||
| class DataDumper; | |||
| namespace tasksink { | |||
| class TaskGenerator; | |||
| } // namespace tasksink | |||
| @@ -90,6 +91,7 @@ class DeviceAddress { | |||
| friend class mindspore::device::gpu::GPUMemoryManager; | |||
| friend class mindspore::device::ascend::AscendKernelRuntime; | |||
| friend class mindspore::device::ascend::AscendMemoryManager; | |||
| friend class mindspore::device::ascend::DataDumper; | |||
| }; | |||
| using DeviceAddressPtr = std::shared_ptr<DeviceAddress>; | |||
| @@ -34,6 +34,7 @@ | |||
| #include "device/ascend/kernel_select_ascend.h" | |||
| #include "runtime/base.h" | |||
| #include "device/ascend/ascend_stream_assign.h" | |||
| namespace mindspore { | |||
| namespace device { | |||
| using device::ascend::ProfilingUtils; | |||
| @@ -117,6 +118,7 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph> | |||
| std::vector<AnfNodePtr> *mute_inputs = kernel_graph_ptr->MutableInputs(); | |||
| MS_EXCEPTION_IF_NULL(mute_inputs); | |||
| mute_inputs->push_back(switch_loop_input[kLoopCountParamName]); | |||
| mute_inputs->push_back(switch_loop_input[kEpochParamName]); | |||
| mute_inputs->push_back(switch_loop_input[kIterLoopParamName]); | |||
| mute_inputs->push_back(switch_loop_input[kZeroParamName]); | |||
| mute_inputs->push_back(switch_loop_input[kOneParamName]); | |||
| @@ -316,6 +318,13 @@ void KernelAdjust::CreateSwitchOpParameters(const std::shared_ptr<session::Kerne | |||
| one->set_abstract(paremeter_abstract_ptr); | |||
| ParameterPtr one_new = kernel_graph_ptr->NewParameter(one); | |||
| (*switch_loop_input)[kOneParamName] = one_new; | |||
| ParameterPtr epoch = std::make_shared<Parameter>(kernel_graph_ptr); | |||
| MS_EXCEPTION_IF_NULL(epoch); | |||
| epoch->set_name(kEpochParamName); | |||
| epoch->set_abstract(paremeter_abstract_ptr); | |||
| ParameterPtr epoch_new = kernel_graph_ptr->NewParameter(epoch); | |||
| (*switch_loop_input)[kEpochParamName] = epoch_new; | |||
| } | |||
| kernel::KernelBuildInfo::KernelBuildInfoBuilder KernelAdjust::CreateMngKernelBuilder( | |||
| @@ -510,6 +519,14 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) { | |||
| *val = 0; | |||
| inputs->push_back(loop_count_tensor); | |||
| // Epoch in device | |||
| tensor::TensorPtr epoch_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp); | |||
| MS_EXCEPTION_IF_NULL(epoch_tensor); | |||
| val = static_cast<int32_t *>(epoch_tensor->data_c()); | |||
| MS_EXCEPTION_IF_NULL(val); | |||
| *val = 0; | |||
| inputs->push_back(epoch_tensor); | |||
| tensor::TensorPtr iter_loop_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp); | |||
| MS_EXCEPTION_IF_NULL(iter_loop_tensor); | |||
| val = static_cast<int32_t *>(iter_loop_tensor->data_c()); | |||
| @@ -531,6 +548,7 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) { | |||
| MS_EXCEPTION_IF_NULL(val); | |||
| *val = 1; | |||
| inputs->push_back(one_tensor); | |||
| MS_LOG(INFO) << "---------------- LoadSwitchInputs End--"; | |||
| } | |||
| @@ -37,6 +37,7 @@ constexpr auto kLoopCountParamName = "loop_count"; | |||
| constexpr auto kIterLoopParamName = "iter_loop"; | |||
| constexpr auto kZeroParamName = "zero"; | |||
| constexpr auto kOneParamName = "one"; | |||
| constexpr auto kEpochParamName = "loop_epoch"; | |||
| constexpr auto kStreamNeedActivedFirst = "stream_need_active_first"; | |||
| constexpr uint32_t kSecondStreamSwitchLabel = 2; | |||
| @@ -26,6 +26,7 @@ | |||
| #include "kernel/aicpu/aicpu_kernel_build.h" | |||
| #include "utils/convert_utils.h" | |||
| #include "kernel/aicpu/aicpu_util.h" | |||
| #include "utils/context/ms_context.h" | |||
| using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>; | |||
| @@ -144,8 +145,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr> | |||
| if (node_name_ == kTopK) { | |||
| node_name_ = kTopKV2; | |||
| } | |||
| AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>( | |||
| stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs); | |||
| kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump()); | |||
| MS_LOG(INFO) << "AicpuOpKernelMod GenTask end"; | |||
| return {task_info_ptr}; | |||
| @@ -26,6 +26,7 @@ | |||
| #include "runtime/rt.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "utils/convert_utils.h" | |||
| #include "utils/context/ms_context.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| @@ -123,8 +124,8 @@ std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &in | |||
| MS_LOG(DEBUG) << "The block_dim is:" << block_dim; | |||
| TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>( | |||
| stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, input_data_addrs, | |||
| output_data_addrs, workspace_addrs); | |||
| kernel_name_, stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, | |||
| input_data_addrs, output_data_addrs, workspace_addrs, NeedDump()); | |||
| return {task_info_ptr}; | |||
| } | |||
| } // namespace kernel | |||
| @@ -21,6 +21,9 @@ | |||
| #include <memory> | |||
| #include "framework/ge_runtime/task_info.h" | |||
| #include "kernel/kernel.h" | |||
| #ifdef ENABLE_DATA_DUMP | |||
| #include "debug/data_dump_parser.h" | |||
| #endif | |||
| using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>; | |||
| namespace mindspore { | |||
| @@ -31,6 +34,13 @@ class AscendKernelMod : public KernelMod { | |||
| const std::vector<AddressPtr> &, uint32_t) = 0; | |||
| uint32_t block_dim() { return block_dim_; } | |||
| uint32_t stream_id() { return stream_id_; } | |||
| virtual bool NeedDump() { | |||
| #ifdef ENABLE_DATA_DUMP | |||
| return DataDumpParser::GetInstance().NeedDump(kernel_name_); | |||
| #else | |||
| return false; | |||
| #endif | |||
| } | |||
| protected: | |||
| uint32_t block_dim_{1}; | |||
| @@ -18,6 +18,7 @@ | |||
| #include "device/ascend/tasksink/runtime_utils.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "utils/utils.h" | |||
| #include "utils/context/ms_context.h" | |||
| using HcclTaskInfoPtr = std::shared_ptr<ge::model_runner::HcclTaskInfo>; | |||
| using ge::model_runner::HcclTaskInfo; | |||
| @@ -146,10 +147,12 @@ std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inpu | |||
| << ", root_id=" << root_id_ << ", op_type=" << static_cast<int>(op_type_) | |||
| << ", data_type=" << static_cast<int>(data_type); | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| HcclTaskInfoPtr task_info_ptr = std::make_shared<HcclTaskInfo>( | |||
| stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0, private_def, nullptr, | |||
| hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel, RuntimeUtils::HcomUnbindModel, | |||
| RuntimeUtils::HcomDistribute); | |||
| kernel_name_, stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0, | |||
| private_def, nullptr, hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel, | |||
| RuntimeUtils::HcomUnbindModel, RuntimeUtils::HcomDistribute, NeedDump()); | |||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | |||
| return {task_info_ptr}; | |||
| } | |||
| @@ -129,6 +129,10 @@ class KernelMod { | |||
| virtual std::vector<size_t> GenParameters() { return {}; } | |||
| virtual ~KernelMod() = default; | |||
| void set_kernel_name(const std::string &kernel_name) { kernel_name_ = kernel_name; } | |||
| protected: | |||
| std::string kernel_name_; | |||
| }; | |||
| using KernelModPtr = std::shared_ptr<KernelMod>; | |||
| } // namespace kernel | |||
| @@ -58,8 +58,9 @@ std::vector<TaskInfoPtr> AssignKernel::GenTask(const std::vector<AddressPtr> &in | |||
| } | |||
| stream_id_ = stream_id; | |||
| std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>( | |||
| stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr, inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE); | |||
| std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = | |||
| std::make_shared<MemcpyAsyncTaskInfo>(kernel_name_, stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr, | |||
| inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE, false); | |||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | |||
| return {task_info_ptr}; | |||
| } | |||
| @@ -55,7 +55,8 @@ std::vector<TaskInfoPtr> LabelGotoKernel::GenTask(const std::vector<AddressPtr> | |||
| const std::vector<AddressPtr> &, uint32_t stream_id) { | |||
| MS_LOG(INFO) << "LabelGotoKernel GenTask label:" << label_ << ", stream id:" << stream_id; | |||
| std::vector<TaskInfoPtr> task_info_list; | |||
| std::shared_ptr<LabelGotoTaskInfo> task_info_ptr = std::make_shared<LabelGotoTaskInfo>(stream_id, label_); | |||
| std::shared_ptr<LabelGotoTaskInfo> task_info_ptr = | |||
| std::make_shared<LabelGotoTaskInfo>(kernel_name_, stream_id, label_); | |||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | |||
| task_info_list.emplace_back(task_info_ptr); | |||
| return task_info_list; | |||
| @@ -55,7 +55,7 @@ std::vector<TaskInfoPtr> LabelSetKernel::GenTask(const std::vector<AddressPtr> & | |||
| const std::vector<AddressPtr> &, uint32_t stream_id) { | |||
| MS_LOG(INFO) << "LabelSetKernel GenTask label:" << label_ << ", stream id:" << stream_id; | |||
| std::vector<TaskInfoPtr> task_info_list; | |||
| std::shared_ptr<LabelSetTaskInfo> task_info_ptr = std::make_shared<LabelSetTaskInfo>(stream_id, label_); | |||
| std::shared_ptr<LabelSetTaskInfo> task_info_ptr = std::make_shared<LabelSetTaskInfo>(kernel_name_, stream_id, label_); | |||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | |||
| task_info_list.emplace_back(task_info_ptr); | |||
| return task_info_list; | |||
| @@ -67,7 +67,7 @@ std::vector<TaskInfoPtr> LabelSwitchKernel::GenTask(const std::vector<AddressPtr | |||
| MS_LOG(INFO) << "LabelSwitchKernel GenTask label size:" << label_size_ << ", stream id:" << stream_id; | |||
| std::vector<TaskInfoPtr> task_info_list; | |||
| cond_ = inputs[0]->addr; | |||
| auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(stream_id, label_size_, label_list_, cond_); | |||
| auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(kernel_name_, stream_id, label_size_, label_list_, cond_); | |||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | |||
| task_info_list.emplace_back(task_info_ptr); | |||
| return task_info_list; | |||
| @@ -23,6 +23,7 @@ | |||
| #include "common/utils.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "common/trans.h" | |||
| #include "utils/context/ms_context.h" | |||
| using ge::model_runner::MemcpyAsyncTaskInfo; | |||
| using MemcpyAsyncTaskInfoPtr = std::shared_ptr<MemcpyAsyncTaskInfo>; | |||
| @@ -118,8 +119,9 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const std::vector<AddressPtr | |||
| } | |||
| stream_id_ = stream_id; | |||
| std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>( | |||
| stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE); | |||
| std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = | |||
| std::make_shared<MemcpyAsyncTaskInfo>(kernel_name_, stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr, | |||
| inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE, NeedDump()); | |||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | |||
| return {task_info_ptr}; | |||
| } | |||
| @@ -63,7 +63,7 @@ std::vector<TaskInfoPtr> ProfilingKernelMod::GenTask(const std::vector<AddressPt | |||
| << ", outputs size:" << outputs.size(); | |||
| stream_id_ = stream_id; | |||
| std::shared_ptr<ProfilerTraceTaskInfo> task_info_ptr = | |||
| std::make_shared<ProfilerTraceTaskInfo>(stream_id, log_id_, notify_, flags_); | |||
| std::make_shared<ProfilerTraceTaskInfo>(kernel_name_, stream_id, log_id_, notify_, flags_); | |||
| return {task_info_ptr}; | |||
| } | |||
| } // namespace kernel | |||
| @@ -60,7 +60,7 @@ std::vector<TaskInfoPtr> RecvKernel::GenTask(const std::vector<AddressPtr> &, co | |||
| const std::vector<AddressPtr> &, uint32_t stream_id) { | |||
| MS_LOG(INFO) << "RecvKernel GenTask event_id_:" << event_id_ << ", stream_id_:" << stream_id; | |||
| stream_id_ = stream_id; | |||
| EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(stream_id, event_id_); | |||
| EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(kernel_name_, stream_id, event_id_); | |||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | |||
| return {task_info_ptr}; | |||
| } | |||
| @@ -57,7 +57,7 @@ std::vector<TaskInfoPtr> SendKernel::GenTask(const std::vector<AddressPtr> &, co | |||
| const std::vector<AddressPtr> &, uint32_t stream_id) { | |||
| MS_LOG(INFO) << "SendKernel GenTask event id:" << event_id_ << ", stream id:" << stream_id; | |||
| stream_id_ = stream_id; | |||
| EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(stream_id, event_id_); | |||
| EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(kernel_name_, stream_id, event_id_); | |||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | |||
| return {task_info_ptr}; | |||
| } | |||
| @@ -72,7 +72,8 @@ std::vector<TaskInfoPtr> StreamActiveKernel::GenTask(const std::vector<AddressPt | |||
| stream_id_ = stream_id; | |||
| std::vector<TaskInfoPtr> task_info_list; | |||
| for (auto &index : active_streams_index_) { | |||
| std::shared_ptr<StreamActiveTaskInfo> task_info_ptr = std::make_shared<StreamActiveTaskInfo>(stream_id, index); | |||
| std::shared_ptr<StreamActiveTaskInfo> task_info_ptr = | |||
| std::make_shared<StreamActiveTaskInfo>(kernel_name_, stream_id, index); | |||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | |||
| task_info_list.emplace_back(task_info_ptr); | |||
| MS_LOG(INFO) << "StreamActiveKernel GenTask: streamId:" << stream_id << ", Active streamId:" << index; | |||
| @@ -91,8 +91,8 @@ std::vector<TaskInfoPtr> StreamSwitchKernel::GenTask(const std::vector<AddressPt | |||
| auto ites_per_loop = inputs[1]->addr; | |||
| MS_LOG(INFO) << "cond_:" << static_cast<int>(cond_) << ", true_stream_index_:" << true_stream_index_ | |||
| << ", stream_id:" << stream_id; | |||
| std::shared_ptr<StreamSwitchTaskInfo> task_info_ptr = | |||
| std::make_shared<StreamSwitchTaskInfo>(stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_); | |||
| std::shared_ptr<StreamSwitchTaskInfo> task_info_ptr = std::make_shared<StreamSwitchTaskInfo>( | |||
| kernel_name_, stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_); | |||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | |||
| return {task_info_ptr}; | |||
| } | |||
| @@ -17,7 +17,7 @@ | |||
| #include "kernel/tbe/tbe_kernel_mod.h" | |||
| #include <algorithm> | |||
| #include "runtime/rt.h" | |||
| #include "nlohmann/json.hpp" | |||
| #include "utils/context/ms_context.h" | |||
| #include "graphengine/inc/framework/ge_runtime/task_info.h" | |||
| namespace mindspore { | |||
| @@ -99,9 +99,9 @@ std::vector<TaskInfoPtr> TbeKernelMod::GenTask(const std::vector<AddressPtr> &in | |||
| MS_LOG(INFO) << "block_dim is:" << block_dim_; | |||
| TbeTaskInfoPtr task_info_ptr = | |||
| make_shared<ge::model_runner::TbeTaskInfo>(stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0, | |||
| meta_data, input_data_addrs, output_data_addrs, workspace_addrs); | |||
| TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>( | |||
| kernel_name_, stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0, meta_data, input_data_addrs, | |||
| output_data_addrs, workspace_addrs, NeedDump()); | |||
| return {task_info_ptr}; | |||
| } | |||
| @@ -36,7 +36,7 @@ namespace session { | |||
| using AnfWithOutIndex = std::pair<AnfNodePtr, size_t>; | |||
| class KernelGraph : public FuncGraph { | |||
| public: | |||
| KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false) { | |||
| KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false), current_epoch_(0) { | |||
| inputs_ = std::make_shared<std::vector<AnfNodePtr>>(); | |||
| execution_order_ = {}; | |||
| executable_ = true; | |||
| @@ -154,6 +154,8 @@ class KernelGraph : public FuncGraph { | |||
| AnfNodePtr GetFrontNodeByInternalOutput(const AnfNodePtr &node) const; | |||
| void AddFinalOutputKernel(const AnfNodePtr &node); | |||
| bool IsFinalOutputKernel(const AnfNodePtr &node) const; | |||
| uint32_t current_epoch() const { return current_epoch_; } | |||
| void set_current_epoch(uint32_t epoch) { current_epoch_ = epoch; } | |||
| private: | |||
| // remove value node form graph | |||
| @@ -216,6 +218,7 @@ class KernelGraph : public FuncGraph { | |||
| std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_; | |||
| std::unordered_map<AnfNodePtr, AnfNodePtr> internal_outputs_to_front_map_; | |||
| std::set<AnfNodePtr> final_output_kernels_; | |||
| uint32_t current_epoch_; | |||
| }; | |||
| } // namespace session | |||
| using KernelGraphPtr = std::shared_ptr<session::KernelGraph>; | |||
| @@ -187,6 +187,18 @@ size_t LoadCtrlInputTensor(const std::shared_ptr<KernelGraph> &graph, std::vecto | |||
| // set loop_count to zero | |||
| MS_EXCEPTION_IF_NULL(inputs); | |||
| inputs->push_back(tensor); | |||
| auto epoch_tensor = (*inputs_params)[1]; | |||
| MS_EXCEPTION_IF_NULL(epoch_tensor); | |||
| auto *epoch_val = static_cast<int32_t *>(epoch_tensor->data_c()); | |||
| MS_EXCEPTION_IF_NULL(epoch_val); | |||
| *epoch_val = graph->current_epoch(); | |||
| epoch_tensor->set_dirty(true); | |||
| inputs->push_back(epoch_tensor); | |||
| MS_LOG(INFO) << "Load epoch_val:" << *epoch_val; | |||
| graph->set_current_epoch(graph->current_epoch() + 1); | |||
| return inputs_params->size(); | |||
| } | |||
| @@ -814,13 +826,13 @@ void SessionBasic::AddParameterToGraphInputs(const std::vector<AnfNodePtr> ¶ | |||
| void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph, | |||
| const std::vector<tensor::TensorPtr> &inputs_const) const { | |||
| std::vector<tensor::TensorPtr> inputs(inputs_const); | |||
| size_t input_ctrl_size = 1; | |||
| size_t input_ctrl_size = 2; | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| if (kernel_graph->input_ctrl_tensors()) { | |||
| input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs); | |||
| } | |||
| auto input_nodes = kernel_graph->inputs(); | |||
| if ((inputs.size() + input_ctrl_size) - 1 != input_nodes.size()) { | |||
| if ((inputs.size() + input_ctrl_size) - 2 != input_nodes.size()) { | |||
| MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size() | |||
| << ", input_ctrl_size:" << input_ctrl_size; | |||
| } | |||
| @@ -32,6 +32,8 @@ bool ModelRunner::LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint | |||
| bool ModelRunner::UnloadModel(uint32_t model_id) { return true; } | |||
| bool ModelRunner::LoadModelComplete(uint32_t model_id) { return true; } | |||
| bool ModelRunner::RunModel(uint32_t model_id, const ge::InputData &input_data, ge::OutputData *output_data) { | |||
| return true; | |||
| } | |||
| @@ -45,6 +47,11 @@ const std::vector<uint32_t> &ModelRunner::GetStreamIdList(uint32_t model_id) con | |||
| static std::vector<uint32_t> stream_id_list; | |||
| return stream_id_list; | |||
| } | |||
| const std::map<std::string, std::shared_ptr<RuntimeInfo>> &ModelRunner::GetRuntimeInfoMap(uint32_t model_id) const { | |||
| static std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map; | |||
| return runtime_info_map; | |||
| } | |||
| } // namespace model_runner | |||
| } // namespace ge | |||
| @@ -15,7 +15,6 @@ | |||
| */ | |||
| #include "device/ascend/ascend_stream_assign.h" | |||
| #include "device/ascend/ascend_label_assign.h" | |||
| #include "device/ascend/tasksink/task_generator.h" | |||
| #include "device/kernel_adjust.h" | |||
| namespace mindspore { | |||
| @@ -31,13 +30,6 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr) | |||
| void AscendStreamAssign::GetWaitStreams(vector<uint32_t> *wait_active_stream_list) { return; } | |||
| void AscendStreamAssign::GetHcomStreams(std::vector<uint32_t> *streams) { return; } | |||
| namespace tasksink { | |||
| bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *const task_info_list, | |||
| uint32_t graph_id) { | |||
| return true; | |||
| } | |||
| } // namespace tasksink | |||
| } // namespace ascend | |||
| void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return; } | |||
| bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return true; } | |||
| @@ -0,0 +1,30 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "device/ascend/tasksink/task_generator.h" | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| namespace tasksink { | |||
| bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *const task_info_list, | |||
| uint32_t graph_id) { | |||
| return true; | |||
| } | |||
| } // namespace tasksink | |||
| } // namespace ascend | |||
| } // namespace device | |||
| } // namespace mindspore | |||