| @@ -24,7 +24,7 @@ usage() | |||||
| { | { | ||||
| echo "Usage:" | echo "Usage:" | ||||
| echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" | echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" | ||||
| echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" | |||||
| echo " [-a on|off] [-Q on|off] [-S on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" | |||||
| echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]" | echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]" | ||||
| echo "" | echo "" | ||||
| echo "Options:" | echo "Options:" | ||||
| @@ -48,6 +48,7 @@ usage() | |||||
| echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" | echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" | ||||
| echo " -Q Enable dump memory, default off" | echo " -Q Enable dump memory, default off" | ||||
| echo " -D Enable dumping of function graph ir, default on" | echo " -D Enable dumping of function graph ir, default on" | ||||
| echo " -S Enable async data dump, default off" | |||||
| echo " -z Compile dataset & mindrecord, default on" | echo " -z Compile dataset & mindrecord, default on" | ||||
| echo " -M Enable MPI and NCCL for GPU training, gpu default on" | echo " -M Enable MPI and NCCL for GPU training, gpu default on" | ||||
| echo " -V Specify the minimum required cuda version, default CUDA 10.1" | echo " -V Specify the minimum required cuda version, default CUDA 10.1" | ||||
| @@ -88,6 +89,7 @@ checkopts() | |||||
| ENABLE_TIMELINE="off" | ENABLE_TIMELINE="off" | ||||
| ENABLE_DUMP2PROTO="on" | ENABLE_DUMP2PROTO="on" | ||||
| ENABLE_DUMPE2E="off" | ENABLE_DUMPE2E="off" | ||||
| ENABLE_DATA_DUMP="off" | |||||
| ENABLE_DUMP_IR="on" | ENABLE_DUMP_IR="on" | ||||
| COMPILE_MINDDATA="on" | COMPILE_MINDDATA="on" | ||||
| ENABLE_MPI="off" | ENABLE_MPI="off" | ||||
| @@ -102,7 +104,7 @@ checkopts() | |||||
| ENABLE_PYTHON="on" | ENABLE_PYTHON="on" | ||||
| # Process the options | # Process the options | ||||
| while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:sB:E' opt | |||||
| while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:S:D:zM:V:K:sB:E' opt | |||||
| do | do | ||||
| OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') | OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') | ||||
| case "${opt}" in | case "${opt}" in | ||||
| @@ -218,6 +220,11 @@ checkopts() | |||||
| ENABLE_DUMPE2E="$OPTARG" | ENABLE_DUMPE2E="$OPTARG" | ||||
| echo "enable dump end to end" | echo "enable dump end to end" | ||||
| ;; | ;; | ||||
| S) | |||||
| check_on_off $OPTARG S | |||||
| ENABLE_DATA_DUMP="$OPTARG" | |||||
| echo "enable data dump" | |||||
| ;; | |||||
| D) | D) | ||||
| check_on_off $OPTARG D | check_on_off $OPTARG D | ||||
| ENABLE_DUMP_IR="$OPTARG" | ENABLE_DUMP_IR="$OPTARG" | ||||
| @@ -321,6 +328,9 @@ build_mindspore() | |||||
| if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then | if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then | ||||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON" | CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON" | ||||
| fi | fi | ||||
| if [[ "X$ENABLE_DATA_DUMP" = "Xon" ]]; then | |||||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DATA_DUMP=ON" | |||||
| fi | |||||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}" | CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}" | ||||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}" | CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}" | ||||
| if [[ "X$ENABLE_MPI" = "Xon" ]]; then | if [[ "X$ENABLE_MPI" = "Xon" ]]; then | ||||
| @@ -116,6 +116,10 @@ if(ENABLE_DUMP_E2E) | |||||
| add_compile_definitions(ENABLE_DUMP_E2E) | add_compile_definitions(ENABLE_DUMP_E2E) | ||||
| endif() | endif() | ||||
| if(ENABLE_DATA_DUMP) | |||||
| add_compile_definitions(ENABLE_DATA_DUMP) | |||||
| endif() | |||||
| if(ENABLE_DEBUGGER) | if(ENABLE_DEBUGGER) | ||||
| add_compile_definitions(ENABLE_DEBUGGER) | add_compile_definitions(ENABLE_DEBUGGER) | ||||
| endif() | endif() | ||||
| @@ -0,0 +1,15 @@ | |||||
| { | |||||
| "DumpSettings": { | |||||
| "net_name": "ResNet50", | |||||
| "mode": 1, | |||||
| "iteration": 0, | |||||
| "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] | |||||
| }, | |||||
| "DumpSettingsSpec": { | |||||
| "net_name": "net name eg:ResNet50", | |||||
| "mode": "0: dump all kernels, 1: dump kernels in kernels list", | |||||
| "iteration": "specified iteration ", | |||||
| "kernels": "op's full scope name which need to be dump" | |||||
| } | |||||
| } | |||||
| @@ -1 +1 @@ | |||||
| Subproject commit 1c2672868fda8b1d012c99e5aca73725ac869ba9 | |||||
| Subproject commit 18cf690152add623ffbddfbbb4674d1b34484ca7 | |||||
| @@ -109,8 +109,12 @@ if (ENABLE_D) | |||||
| file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto") | file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto") | ||||
| ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER}) | ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER}) | ||||
| file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/dump/proto/*.proto") | |||||
| ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP}) | |||||
| list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS}) | list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS}) | ||||
| list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS}) | list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS}) | ||||
| list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS}) | |||||
| add_compile_definitions(ENABLE_D) | add_compile_definitions(ENABLE_D) | ||||
| endif () | endif () | ||||
| @@ -19,6 +19,15 @@ if (ENABLE_DEBUGGER) | |||||
| ) | ) | ||||
| endif (ENABLE_DEBUGGER) | endif (ENABLE_DEBUGGER) | ||||
| if (ENABLE_D) | |||||
| list(APPEND _DEBUG_SRC_LIST | |||||
| "${CMAKE_CURRENT_SOURCE_DIR}/common.cc" | |||||
| ) | |||||
| if (ENABLE_DATA_DUMP) | |||||
| list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/data_dump_parser.cc") | |||||
| endif(ENABLE_DATA_DUMP) | |||||
| endif() | |||||
| if (ENABLE_DUMP_E2E) | if (ENABLE_DUMP_E2E) | ||||
| list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/e2e_dump.cc") | list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/e2e_dump.cc") | ||||
| endif (ENABLE_DUMP_E2E) | endif (ENABLE_DUMP_E2E) | ||||
| @@ -0,0 +1,125 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "debug/common.h" | |||||
| #include <memory> | |||||
| #include <optional> | |||||
| #include "utils/system/env.h" | |||||
| #include "utils/system/file_system.h" | |||||
| #include "utils/log_adapter.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| namespace mindspore { | |||||
| std::optional<std::string> Common::GetRealPath(const std::string &input_path) { | |||||
| std::string out_path; | |||||
| auto path_split_pos = input_path.find_last_of('/'); | |||||
| if (path_split_pos == std::string::npos) { | |||||
| path_split_pos = input_path.find_last_of('\\'); | |||||
| } | |||||
| // get real path | |||||
| char real_path[PATH_MAX] = {0}; | |||||
| if (path_split_pos != std::string::npos) { | |||||
| std::string prefix_path = input_path.substr(0, path_split_pos); | |||||
| if (prefix_path.length() >= PATH_MAX) { | |||||
| MS_LOG(ERROR) << "Prefix path is too longer!"; | |||||
| return std::nullopt; | |||||
| } | |||||
| std::string last_path = input_path.substr(path_split_pos, input_path.length() - path_split_pos); | |||||
| auto ret = CreateNotExistDirs(prefix_path); | |||||
| if (!ret) { | |||||
| MS_LOG(ERROR) << "CreateNotExistDirs Failed!"; | |||||
| return std::nullopt; | |||||
| } | |||||
| if (nullptr == realpath(prefix_path.c_str(), real_path)) { | |||||
| MS_LOG(ERROR) << "dir " << prefix_path << " does not exit."; | |||||
| return std::nullopt; | |||||
| } | |||||
| out_path = std::string(real_path) + last_path; | |||||
| } | |||||
| if (path_split_pos == std::string::npos) { | |||||
| if (input_path.length() >= PATH_MAX) { | |||||
| MS_LOG(ERROR) << "Prefix path is too longer!"; | |||||
| return std::nullopt; | |||||
| } | |||||
| if (nullptr == realpath(input_path.c_str(), real_path)) { | |||||
| MS_LOG(ERROR) << "File " << input_path << " does not exit, it will be created."; | |||||
| } | |||||
| out_path = std::string(real_path); | |||||
| } | |||||
| return out_path; | |||||
| } | |||||
| bool Common::CreateNotExistDirs(const std::string &path) { | |||||
| std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem(); | |||||
| MS_EXCEPTION_IF_NULL(fs); | |||||
| char temp_path[PATH_MAX] = {0}; | |||||
| if (path.length() > PATH_MAX) { | |||||
| MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX; | |||||
| return false; | |||||
| } | |||||
| for (uint32_t i = 0; i < path.length(); i++) { | |||||
| temp_path[i] = path[i]; | |||||
| if (temp_path[i] == '\\' || temp_path[i] == '/') { | |||||
| if (i != 0) { | |||||
| char tmp_char = temp_path[i]; | |||||
| temp_path[i] = '\0'; | |||||
| std::string path_handle(temp_path); | |||||
| if (!fs->FileExist(temp_path)) { | |||||
| MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating..."; | |||||
| if (!fs->CreateDir(temp_path)) { | |||||
| MS_LOG(ERROR) << "Create " << path_handle << " dir error"; | |||||
| return false; | |||||
| } | |||||
| } | |||||
| temp_path[i] = tmp_char; | |||||
| } | |||||
| } | |||||
| } | |||||
| if (!fs->FileExist(path)) { | |||||
| MS_LOG(INFO) << "Dir " << path << " does not exit, creating..."; | |||||
| if (!fs->CreateDir(path)) { | |||||
| MS_LOG(ERROR) << "Create " << path << " dir error"; | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::optional<std::string> Common::GetConfigFile(const std::string &env) { | |||||
| if (env.empty()) { | |||||
| MS_LOG(EXCEPTION) << "Invalid env"; | |||||
| } | |||||
| auto config_path_str = std::getenv(env.c_str()); | |||||
| if (config_path_str == nullptr) { | |||||
| MS_LOG(ERROR) << "Please export env:" << env; | |||||
| return {}; | |||||
| } | |||||
| MS_LOG(INFO) << "Async Dump Getenv env:" << env << "=" << config_path_str; | |||||
| std::string dump_config_file(config_path_str); | |||||
| std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem(); | |||||
| MS_EXCEPTION_IF_NULL(fs); | |||||
| if (!fs->FileExist(dump_config_file)) { | |||||
| MS_LOG(ERROR) << dump_config_file << " not exist."; | |||||
| return {}; | |||||
| } | |||||
| return dump_config_file; | |||||
| } | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,36 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_ | |||||
| #define MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_ | |||||
| #include <string> | |||||
| #include <optional> | |||||
| #include "utils/contract.h" | |||||
| namespace mindspore { | |||||
| class Common { | |||||
| public: | |||||
| Common() = default; | |||||
| ~Common() = default; | |||||
| static std::optional<std::string> GetRealPath(const std::string &input_path); | |||||
| static std::optional<std::string> GetConfigFile(const std::string &env); | |||||
| private: | |||||
| static bool CreateNotExistDirs(const std::string &path); | |||||
| }; | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_ | |||||
| @@ -0,0 +1,152 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "debug/data_dump_parser.h" | |||||
| #include <fstream> | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "debug/common.h" | |||||
| constexpr auto kDataDumpConfigPtah = "DATA_DUMP_CONFIG_PATH"; | |||||
| constexpr auto kEnableDataDump = "ENABLE_DATA_DUMP"; | |||||
| constexpr auto kDataDumpPath = "DATA_DUMP_PATH"; | |||||
| namespace mindspore { | |||||
| void DataDumpParser::ResetParam() { | |||||
| enable_ = false; | |||||
| net_name_.clear(); | |||||
| dump_mode_ = 0; | |||||
| dump_step_ = 0; | |||||
| kernel_set_.clear(); | |||||
| } | |||||
| bool DataDumpParser::DumpEnabled() const { | |||||
| auto enable_dump = std::getenv(kEnableDataDump); | |||||
| if (!enable_dump) { | |||||
| MS_LOG(WARNING) << "[DataDump] enable dump is null. Please export ENABLE_DATA_DUMP"; | |||||
| return false; | |||||
| } | |||||
| auto enabled = std::atoi(enable_dump); | |||||
| if (enabled != 1) { | |||||
| MS_LOG(WARNING) << "[DataDump] Please export ENABLE_DATA_DUMP=1"; | |||||
| return false; | |||||
| } | |||||
| auto context = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context); | |||||
| if (context->execution_mode() == kPynativeMode) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] PyNative mode not support data dump"; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::optional<std::string> DataDumpParser::GetDumpPath() const { | |||||
| auto dump_path = std::getenv(kDataDumpPath); | |||||
| if (!dump_path) { | |||||
| MS_LOG(ERROR) << "[DataDump] dump path is null. Please export DATA_DUMP_PATH"; | |||||
| return {}; | |||||
| } | |||||
| std::string dump_path_str(dump_path); | |||||
| return dump_path_str; | |||||
| } | |||||
| void DataDumpParser::ParseDumpConfig() { | |||||
| std::lock_guard<std::mutex> guard(lock_); | |||||
| MS_LOG(INFO) << "[DataDump] parse start"; | |||||
| if (!DumpEnabled()) { | |||||
| MS_LOG(INFO) << "[DataDump] dump not enable"; | |||||
| return; | |||||
| } | |||||
| ResetParam(); | |||||
| auto dump_config_file = Common::GetConfigFile(kDataDumpConfigPtah); | |||||
| if (!dump_config_file.has_value()) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] Get config file failed"; | |||||
| } | |||||
| std::ifstream json_file(dump_config_file.value()); | |||||
| if (!json_file.is_open()) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] " << dump_config_file.value() << " open failed."; | |||||
| } | |||||
| nlohmann::json j; | |||||
| json_file >> j; | |||||
| if (j.find("DumpSettings") == j.end()) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] DumpSettings is not exist."; | |||||
| } | |||||
| nlohmann::json dump_settings = j.at("DumpSettings"); | |||||
| // convert json to string | |||||
| std::stringstream ss; | |||||
| ss << dump_settings; | |||||
| std::string cfg = ss.str(); | |||||
| MS_LOG(INFO) << "[DataDump] Async dump settings Json: " << cfg; | |||||
| if (!IsConfigExist(dump_settings)) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] Async dump json invalid"; | |||||
| } | |||||
| if (!ParseDumpSetting(dump_settings)) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] Parse dump json failed"; | |||||
| } | |||||
| } | |||||
| bool DataDumpParser::NeedDump(const std::string &op_full_name) const { | |||||
| if (!DumpEnabled()) { | |||||
| return false; | |||||
| } | |||||
| if (dump_mode_ == 0) { | |||||
| return true; | |||||
| } | |||||
| auto iter = kernel_set_.find(op_full_name); | |||||
| return iter != kernel_set_.end(); | |||||
| } | |||||
| bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const { | |||||
| if (dump_settings.find("mode") == dump_settings.end() || dump_settings.find("net_name") == dump_settings.end() || | |||||
| dump_settings.find("iteration") == dump_settings.end() || dump_settings.find("kernels") == dump_settings.end()) { | |||||
| MS_LOG(ERROR) << "[DataDump] DumpSettings keys are not exist."; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) { | |||||
| auto mode = dump_settings.at("mode"); | |||||
| auto net_name = dump_settings.at("net_name"); | |||||
| auto iteration = dump_settings.at("iteration"); | |||||
| auto kernels = dump_settings.at("kernels"); | |||||
| if (!(mode.is_number() && net_name.is_string() && iteration.is_number() && kernels.is_array())) { | |||||
| MS_LOG(ERROR) << "[DataDump] Element's type in Dump config json is invalid."; | |||||
| enable_ = false; | |||||
| return false; | |||||
| } | |||||
| enable_ = true; | |||||
| auto context_ptr = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||||
| dump_mode_ = mode; | |||||
| net_name_ = net_name; | |||||
| dump_step_ = iteration; | |||||
| for (const auto &kernel : kernels) { | |||||
| auto kernel_str = kernel.dump(); | |||||
| kernel_str.erase(std::remove(kernel_str.begin(), kernel_str.end(), '\"'), kernel_str.end()); | |||||
| MS_LOG(INFO) << "[DataDump] Need dump kernel:" << kernel_str; | |||||
| kernel_set_.insert(kernel_str); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,61 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_ | |||||
| #define MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_ | |||||
| #include <string> | |||||
| #include <set> | |||||
| #include <mutex> | |||||
| #include <optional> | |||||
| #include "nlohmann/json.hpp" | |||||
| #include "common/utils.h" | |||||
| namespace mindspore { | |||||
| class DataDumpParser { | |||||
| public: | |||||
| static DataDumpParser &GetInstance() { | |||||
| static DataDumpParser instance; | |||||
| return instance; | |||||
| } | |||||
| void ParseDumpConfig(); | |||||
| bool NeedDump(const std::string &op_full_name) const; | |||||
| bool DumpEnabled() const; | |||||
| std::optional<std::string> GetDumpPath() const; | |||||
| bool enable() const { return enable_; } | |||||
| const std::string &net_name() const { return net_name_; } | |||||
| uint32_t dump_mode() const { return dump_mode_; } | |||||
| uint32_t dump_step() const { return dump_step_; } | |||||
| const std::set<std::string> &kernel_set() const { return kernel_set_; } | |||||
| private: | |||||
| DataDumpParser() = default; | |||||
| virtual ~DataDumpParser() = default; | |||||
| DISABLE_COPY_AND_ASSIGN(DataDumpParser); | |||||
| void ResetParam(); | |||||
| bool IsConfigExist(const nlohmann::json &dump_settings) const; | |||||
| bool ParseDumpSetting(const nlohmann::json &dump_settings); | |||||
| std::mutex lock_; | |||||
| bool enable_{false}; | |||||
| std::string net_name_; | |||||
| uint32_t dump_mode_{0}; | |||||
| uint32_t dump_step_{0}; | |||||
| std::set<std::string> kernel_set_; | |||||
| }; | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_ | |||||
| @@ -17,12 +17,14 @@ | |||||
| #include <limits.h> | #include <limits.h> | ||||
| #include <fstream> | #include <fstream> | ||||
| #include <string> | #include <string> | ||||
| #include <optional> | |||||
| #include <nlohmann/json.hpp> | #include <nlohmann/json.hpp> | ||||
| #include "utils/log_adapter.h" | #include "utils/log_adapter.h" | ||||
| #include "utils/system/file_system.h" | #include "utils/system/file_system.h" | ||||
| #include "utils/system/env.h" | #include "utils/system/env.h" | ||||
| #include "utils/convert_utils.h" | #include "utils/convert_utils.h" | ||||
| #include "utils/context/ms_context.h" | #include "utils/context/ms_context.h" | ||||
| #include "debug/common.h" | |||||
| using json = nlohmann::json; | using json = nlohmann::json; | ||||
| @@ -158,100 +160,19 @@ bool Dump::DumpToFile(const std::string &filename, const void *data, size_t len) | |||||
| return false; | return false; | ||||
| } | } | ||||
| std::string realpath; | |||||
| bool ret = GetRealPath(filename, &realpath); | |||||
| if (!ret) { | |||||
| auto realpath = Common::GetRealPath(filename); | |||||
| if (!realpath.has_value()) { | |||||
| MS_LOG(ERROR) << "Get real path failed."; | MS_LOG(ERROR) << "Get real path failed."; | ||||
| return false; | return false; | ||||
| } | } | ||||
| std::ofstream fd; | std::ofstream fd; | ||||
| fd.open(realpath, std::ios::binary | std::ios::out); | |||||
| fd.open(realpath.value(), std::ios::binary | std::ios::out); | |||||
| if (!fd.is_open()) { | if (!fd.is_open()) { | ||||
| MS_LOG(ERROR) << "Open file " << realpath << " fail."; | |||||
| MS_LOG(ERROR) << "Open file " << realpath.value() << " fail."; | |||||
| return false; | return false; | ||||
| } | } | ||||
| (void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len)); | (void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len)); | ||||
| fd.close(); | fd.close(); | ||||
| return true; | return true; | ||||
| } | } | ||||
| bool Dump::GetRealPath(const std::string &inpath, std::string *outpath) { | |||||
| MS_EXCEPTION_IF_NULL(outpath); | |||||
| auto path_split_pos = inpath.find_last_of('/'); | |||||
| if (path_split_pos == std::string::npos) { | |||||
| path_split_pos = inpath.find_last_of('\\'); | |||||
| } | |||||
| // get real path | |||||
| char real_path[PATH_MAX] = {0}; | |||||
| if (path_split_pos != std::string::npos) { | |||||
| std::string prefix_path = inpath.substr(0, path_split_pos); | |||||
| if (prefix_path.length() >= PATH_MAX) { | |||||
| MS_LOG(ERROR) << "Prefix path is too longer!"; | |||||
| return false; | |||||
| } | |||||
| std::string last_path = inpath.substr(path_split_pos, inpath.length() - path_split_pos); | |||||
| auto ret = CreateNotExistDirs(prefix_path); | |||||
| if (ret == false) { | |||||
| MS_LOG(ERROR) << "CreateNotExistDirs Failed!"; | |||||
| return false; | |||||
| } | |||||
| if (nullptr == realpath(prefix_path.c_str(), real_path)) { | |||||
| MS_LOG(ERROR) << "dir " << prefix_path << " does not exit."; | |||||
| return false; | |||||
| } | |||||
| *outpath = std::string(real_path) + last_path; | |||||
| } | |||||
| if (path_split_pos == std::string::npos) { | |||||
| if (inpath.length() >= PATH_MAX) { | |||||
| MS_LOG(ERROR) << "Prefix path is too longer!"; | |||||
| return false; | |||||
| } | |||||
| if (nullptr == realpath(inpath.c_str(), real_path)) { | |||||
| MS_LOG(ERROR) << "File " << inpath << " does not exit, it will be created."; | |||||
| } | |||||
| *outpath = std::string(real_path); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool Dump::CreateNotExistDirs(const std::string &path) { | |||||
| std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem(); | |||||
| MS_EXCEPTION_IF_NULL(fs); | |||||
| char temp_path[PATH_MAX] = {0}; | |||||
| if (path.length() > PATH_MAX) { | |||||
| MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX; | |||||
| return false; | |||||
| } | |||||
| for (uint32_t i = 0; i < path.length(); i++) { | |||||
| temp_path[i] = path[i]; | |||||
| if (temp_path[i] == '\\' || temp_path[i] == '/') { | |||||
| if (i != 0) { | |||||
| char tmp_char = temp_path[i]; | |||||
| temp_path[i] = '\0'; | |||||
| std::string path_handle(temp_path); | |||||
| if (!fs->FileExist(temp_path)) { | |||||
| MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating..."; | |||||
| if (!fs->CreateDir(temp_path)) { | |||||
| MS_LOG(ERROR) << "Create " << path_handle << " dir error"; | |||||
| return false; | |||||
| } | |||||
| } | |||||
| temp_path[i] = tmp_char; | |||||
| } | |||||
| } | |||||
| } | |||||
| if (!fs->FileExist(path)) { | |||||
| MS_LOG(INFO) << "Dir " << path << " does not exit, creating..."; | |||||
| if (!fs->CreateDir(path)) { | |||||
| MS_LOG(ERROR) << "Create " << path << " dir error"; | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -59,10 +59,6 @@ class Dump { | |||||
| uint32_t cur_iter_; | uint32_t cur_iter_; | ||||
| std::vector<std::string> dump_kernels_; | std::vector<std::string> dump_kernels_; | ||||
| static bool GetRealPath(const std::string &inpath, std::string *outpath); | |||||
| static bool CreateNotExistDirs(const std::string &path); | |||||
| private: | private: | ||||
| bool ParseDumpConfig(const std::string &dump_config_file); | bool ParseDumpConfig(const std::string &dump_config_file); | ||||
| bool IsConfigExist(const nlohmann::json &dumpSettings); | bool IsConfigExist(const nlohmann::json &dumpSettings); | ||||
| @@ -42,6 +42,7 @@ | |||||
| #include "device/ascend/ascend_memory_manager.h" | #include "device/ascend/ascend_memory_manager.h" | ||||
| #include "debug/tensor_load.h" | #include "debug/tensor_load.h" | ||||
| using ge::model_runner::ModelRunner; | |||||
| using mindspore::device::ascend::ProfilingManager; | using mindspore::device::ascend::ProfilingManager; | ||||
| using mindspore::device::ascend::ProfilingUtils; | using mindspore::device::ascend::ProfilingUtils; | ||||
| using mindspore::device::ascend::tasksink::TaskGenerator; | using mindspore::device::ascend::tasksink::TaskGenerator; | ||||
| @@ -90,9 +91,16 @@ std::string GetRankId() { | |||||
| AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); } | AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); } | ||||
| void AscendKernelRuntime::ClearGraphModelMap() { | void AscendKernelRuntime::ClearGraphModelMap() { | ||||
| #ifdef ENABLE_DATA_DUMP | |||||
| for (auto &iter : graph_data_dumper_) { | |||||
| MS_LOG(INFO) << "[DataDump] Unload data dumper:" << iter.first; | |||||
| iter.second->UnloadDumpInfo(); | |||||
| } | |||||
| graph_data_dumper_.clear(); | |||||
| #endif | |||||
| for (auto &iter : graph_model_map_) { | for (auto &iter : graph_model_map_) { | ||||
| MS_LOG(INFO) << "Ge UnloadModel " << iter.first; | MS_LOG(INFO) << "Ge UnloadModel " << iter.first; | ||||
| auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter.first); | |||||
| auto ret = ModelRunner::Instance().UnloadModel(iter.first); | |||||
| if (!ret) { | if (!ret) { | ||||
| MS_LOG(ERROR) << "UnloadModel failed"; | MS_LOG(ERROR) << "UnloadModel failed"; | ||||
| } | } | ||||
| @@ -107,7 +115,7 @@ void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) { | |||||
| return; | return; | ||||
| } | } | ||||
| MS_LOG(DEBUG) << "Ge UnloadModel " << iter->first; | MS_LOG(DEBUG) << "Ge UnloadModel " << iter->first; | ||||
| auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter->first); | |||||
| auto ret = ModelRunner::Instance().UnloadModel(iter->first); | |||||
| if (!ret) { | if (!ret) { | ||||
| MS_LOG(ERROR) << "UnloadModel failed"; | MS_LOG(ERROR) << "UnloadModel failed"; | ||||
| } | } | ||||
| @@ -159,6 +167,10 @@ bool AscendKernelRuntime::Init() { | |||||
| } | } | ||||
| #endif | #endif | ||||
| #ifdef ENABLE_DATA_DUMP | |||||
| DataDumpParser::GetInstance().ParseDumpConfig(); | |||||
| #endif | |||||
| // Start up profiling before rtSetDevice | // Start up profiling before rtSetDevice | ||||
| ret = ProfilingManager::GetInstance().StartupProfiling(device_id_); | ret = ProfilingManager::GetInstance().StartupProfiling(device_id_); | ||||
| if (!ret) { | if (!ret) { | ||||
| @@ -440,7 +452,7 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) { | |||||
| << ", wait_active_stream_list size:" << wait_active_stream_list.size() | << ", wait_active_stream_list size:" << wait_active_stream_list.size() | ||||
| << ", force_copy_stream_list size:" << force_copy_stream_list.size(); | << ", force_copy_stream_list size:" << force_copy_stream_list.size(); | ||||
| std::vector<std::shared_ptr<ge::model_runner::OpInfo>> empty_list; | std::vector<std::shared_ptr<ge::model_runner::OpInfo>> empty_list; | ||||
| std::shared_ptr<ge::model_runner::DavinciModel> model = std::make_shared<ge::model_runner::DavinciModel>( | |||||
| auto model = std::make_shared<ge::model_runner::DavinciModel>( | |||||
| task_info_list, empty_list, empty_list, empty_list, empty_list, wait_active_stream_list, force_copy_stream_list, 0, | task_info_list, empty_list, empty_list, empty_list, empty_list, wait_active_stream_list, force_copy_stream_list, 0, | ||||
| 0, 0, 0, 0, 0, resource_manager.get_cur_stream_num(), label_assign_instance.GetLabelNum(NOT_NULL(graph)), | 0, 0, 0, 0, 0, resource_manager.get_cur_stream_num(), label_assign_instance.GetLabelNum(NOT_NULL(graph)), | ||||
| resource_manager.get_cur_event_num(), 0); | resource_manager.get_cur_event_num(), 0); | ||||
| @@ -477,21 +489,45 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) { | |||||
| std::shared_ptr<ge::ModelListener> listener; | std::shared_ptr<ge::ModelListener> listener; | ||||
| MS_LOG(INFO) << "LoadDavinciModel mode_id:" << model_iter->first; | MS_LOG(INFO) << "LoadDavinciModel mode_id:" << model_iter->first; | ||||
| bool status = ge::model_runner::ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first, | |||||
| model_iter->second, listener); | |||||
| bool status = | |||||
| ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first, model_iter->second, listener); | |||||
| if (!status) { | if (!status) { | ||||
| MS_LOG(EXCEPTION) << "Load Task Failed"; | MS_LOG(EXCEPTION) << "Load Task Failed"; | ||||
| } | } | ||||
| if (ProfilingManager::GetInstance().IsProfiling()) { | if (ProfilingManager::GetInstance().IsProfiling()) { | ||||
| auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(model_iter->first); | |||||
| auto stream_ids = ge::model_runner::ModelRunner::Instance().GetStreamIdList(model_iter->first); | |||||
| auto task_ids = ModelRunner::Instance().GetTaskIdList(model_iter->first); | |||||
| auto stream_ids = ModelRunner::Instance().GetStreamIdList(model_iter->first); | |||||
| ProfilingUtils::ReportProfilingData(task_ids, stream_ids, NOT_NULL(graph)); | ProfilingUtils::ReportProfilingData(task_ids, stream_ids, NOT_NULL(graph)); | ||||
| } | } | ||||
| #ifdef ENABLE_DATA_DUMP | |||||
| LaunchDataDump(NOT_NULL(graph)); | |||||
| #endif | |||||
| if (!ModelRunner::Instance().LoadModelComplete(model_iter->first)) { | |||||
| MS_LOG(ERROR) << "Call ge runtime LoadModelComplete failed"; | |||||
| return false; | |||||
| } | |||||
| return true; | return true; | ||||
| } | } | ||||
| #ifdef ENABLE_DATA_DUMP | |||||
| void AscendKernelRuntime::LaunchDataDump(NotNull<const session::KernelGraph *> graph) { | |||||
| if (!DataDumpParser::GetInstance().DumpEnabled()) { | |||||
| return; | |||||
| } | |||||
| auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph->graph_id()); | |||||
| auto data_dumper = std::make_shared<DataDumper>(graph.get(), runtime_info_map); | |||||
| MS_EXCEPTION_IF_NULL(data_dumper); | |||||
| data_dumper->LoadDumpInfo(); | |||||
| auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper); | |||||
| if (!ret.second) { | |||||
| MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed"; | |||||
| } | |||||
| } | |||||
| #endif | |||||
| void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) { | void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) { | ||||
| auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(graph_id); | |||||
| auto task_ids = ModelRunner::Instance().GetTaskIdList(graph_id); | |||||
| auto graph_task_names = ProfilingUtils::graph_kernel_name(); | auto graph_task_names = ProfilingUtils::graph_kernel_name(); | ||||
| auto iter = graph_task_names.find(graph_id); | auto iter = graph_task_names.find(graph_id); | ||||
| if (iter != graph_task_names.end()) { | if (iter != graph_task_names.end()) { | ||||
| @@ -524,7 +560,7 @@ bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| bool status = ge::model_runner::ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors); | |||||
| bool status = ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors); | |||||
| if (!status) { | if (!status) { | ||||
| MS_LOG(ERROR) << "Run task failed"; | MS_LOG(ERROR) << "Run task failed"; | ||||
| DebugTaskIdName(graph->graph_id()); | DebugTaskIdName(graph->graph_id()); | ||||
| @@ -24,6 +24,10 @@ | |||||
| #include "framework/ge_runtime/davinci_model.h" | #include "framework/ge_runtime/davinci_model.h" | ||||
| #include "device/kernel_runtime_manager.h" | #include "device/kernel_runtime_manager.h" | ||||
| #include "session/session_basic.h" | #include "session/session_basic.h" | ||||
| #ifdef ENABLE_DATA_DUMP | |||||
| #include "debug/data_dump_parser.h" | |||||
| #include "device/ascend/dump/data_dumper.h" | |||||
| #endif | |||||
| using ge::model_runner::TaskInfo; | using ge::model_runner::TaskInfo; | ||||
| using std::unordered_map; | using std::unordered_map; | ||||
| @@ -66,6 +70,10 @@ class AscendKernelRuntime : public KernelRuntime { | |||||
| bool initialized_{false}; | bool initialized_{false}; | ||||
| unordered_map<GraphId, vector<std::shared_ptr<TaskInfo>>> task_map_; | unordered_map<GraphId, vector<std::shared_ptr<TaskInfo>>> task_map_; | ||||
| unordered_map<GraphId, std::shared_ptr<ge::model_runner::DavinciModel>> graph_model_map_; | unordered_map<GraphId, std::shared_ptr<ge::model_runner::DavinciModel>> graph_model_map_; | ||||
| #ifdef ENABLE_DATA_DUMP | |||||
| void LaunchDataDump(NotNull<const session::KernelGraph *> graph); | |||||
| unordered_map<GraphId, std::shared_ptr<DataDumper>> graph_data_dumper_; | |||||
| #endif | |||||
| }; | }; | ||||
| MS_REG_KERNEL_RUNTIME(kAscendDevice, AscendKernelRuntime); | MS_REG_KERNEL_RUNTIME(kAscendDevice, AscendKernelRuntime); | ||||
| @@ -0,0 +1,282 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifdef ENABLE_DATA_DUMP | |||||
| #include "device/ascend/dump/data_dumper.h" | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "utility" | |||||
| #include "session/anf_runtime_algorithm.h" | |||||
| #include "runtime/mem.h" | |||||
| #include "runtime/kernel.h" | |||||
| #include "device/ascend/dump/ge_dump.h" | |||||
| #include "proto/op_mapping_info.pb.h" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "debug/data_dump_parser.h" | |||||
| constexpr uint32_t kAicpuLoadFlag = 1; | |||||
| constexpr uint32_t kAicpuUnloadFlag = 0; | |||||
| constexpr uint32_t kTupleTaskId = 0; | |||||
| constexpr uint32_t kTupleStreamId = 1; | |||||
| constexpr uint32_t kTupleArgs = 2; | |||||
| constexpr uint32_t kCurrentStepTensorIndex = 0; | |||||
| constexpr uint32_t kCurrentEpochTensorIndex = 1; | |||||
| constexpr uint32_t kStepsPerEpochTensorIndex = 2; | |||||
| namespace mindspore { | |||||
| namespace device { | |||||
| namespace ascend { | |||||
| void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task); | |||||
| void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task); | |||||
| void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr); | |||||
| DataDumper::~DataDumper() { | |||||
| ReleaseDevMem(&dev_load_mem_); | |||||
| ReleaseDevMem(&dev_unload_mem_); | |||||
| } | |||||
| void DataDumper::LoadDumpInfo() { | |||||
| MS_LOG(INFO) << "[DataDump] LoadDumpInfo start"; | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph_); | |||||
| aicpu::dump::OpMappingInfo dump_info; | |||||
| SetOpMappingInfo(NOT_NULL(&dump_info)); | |||||
| auto kernels = kernel_graph_->execution_order(); | |||||
| for (const auto &kernel : kernels) { | |||||
| MS_EXCEPTION_IF_NULL(kernel); | |||||
| if (!KernelNeedDump(kernel)) { | |||||
| continue; | |||||
| } | |||||
| MS_LOG(INFO) << "[DataDump] LoadDumpInfo kernel:" << kernel->fullname_with_scope(); | |||||
| dump_kernel_names_.emplace_back(kernel->fullname_with_scope()); | |||||
| aicpu::dump::Task task; | |||||
| ConstructDumpTask(NOT_NULL(kernel), NOT_NULL(&task)); | |||||
| MS_EXCEPTION_IF_NULL(dump_info.mutable_task()); | |||||
| dump_info.mutable_task()->Add(std::move(task)); | |||||
| } | |||||
| RtLoadDumpData(dump_info, &dev_load_mem_); | |||||
| load_flag_ = true; | |||||
| MS_LOG(INFO) << "[DataDump] LoadDumpInfo end"; | |||||
| } | |||||
| void DataDumper::SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const { | |||||
| auto context_ptr = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph_); | |||||
| auto dump_path = DataDumpParser::GetInstance().GetDumpPath(); | |||||
| if (!dump_path.has_value()) { | |||||
| MS_LOG(EXCEPTION) << "Dump path invalid"; | |||||
| } | |||||
| auto device_id = context_ptr->device_id(); | |||||
| dump_info->set_dump_path(dump_path.value() + "_" + std::to_string(device_id) + "/"); | |||||
| MS_LOG(INFO) << "[DataDump] dump_path:" << dump_path.value(); | |||||
| dump_info->set_model_name(DataDumpParser::GetInstance().net_name() + "_" + std::to_string(kernel_graph_->graph_id())); | |||||
| dump_info->set_dump_step(std::to_string(DataDumpParser::GetInstance().dump_step())); | |||||
| dump_info->set_model_id(kernel_graph_->graph_id()); | |||||
| dump_info->set_flag(kAicpuLoadFlag); | |||||
| const auto &input_ctrl_tensors = kernel_graph_->input_ctrl_tensors(); | |||||
| if (input_ctrl_tensors == nullptr || input_ctrl_tensors->size() < 3) { | |||||
| MS_LOG(INFO) << "[DataDump] Not data sink mode, input_ctrl_tensor"; | |||||
| return; | |||||
| } | |||||
| const auto ¤t_step_tensor = input_ctrl_tensors->at(kCurrentStepTensorIndex); | |||||
| const auto &currnet_epoch_tensor = input_ctrl_tensors->at(kCurrentEpochTensorIndex); | |||||
| const auto &steps_per_epoch_tensor = input_ctrl_tensors->at(kStepsPerEpochTensorIndex); | |||||
| MS_EXCEPTION_IF_NULL(current_step_tensor); | |||||
| MS_EXCEPTION_IF_NULL(currnet_epoch_tensor); | |||||
| MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor); | |||||
| MS_EXCEPTION_IF_NULL(current_step_tensor->device_address()); | |||||
| MS_EXCEPTION_IF_NULL(currnet_epoch_tensor->device_address()); | |||||
| MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor->device_address()); | |||||
| void *current_step = current_step_tensor->device_address()->ptr_; | |||||
| void *current_epoch = currnet_epoch_tensor->device_address()->ptr_; | |||||
| void *steps_per_epoch = steps_per_epoch_tensor->device_address()->ptr_; | |||||
| if (current_epoch != nullptr && current_step != nullptr && steps_per_epoch != nullptr) { | |||||
| dump_info->set_step_id_addr(reinterpret_cast<uint64_t>(current_epoch)); | |||||
| dump_info->set_loop_cond_addr(reinterpret_cast<uint64_t>(current_step)); | |||||
| dump_info->set_iterations_per_loop_addr(reinterpret_cast<uint64_t>(steps_per_epoch)); | |||||
| } else { | |||||
| MS_LOG(INFO) << "Invalid ctrl tensor device address"; | |||||
| } | |||||
| } | |||||
| bool DataDumper::KernelNeedDump(const CNodePtr &kernel) const { | |||||
| if (AnfAlgo::GetKernelType(kernel) != TBE_KERNEL && AnfAlgo::GetKernelType(kernel) != AICPU_KERNEL && | |||||
| AnfAlgo::GetKernelType(kernel) != AKG_KERNEL) { | |||||
| return false; | |||||
| } | |||||
| MS_EXCEPTION_IF_NULL(kernel); | |||||
| const auto &kernel_set = DataDumpParser::GetInstance().kernel_set(); | |||||
| return kernel_set.find(kernel->fullname_with_scope()) != kernel_set.end(); | |||||
| } | |||||
| void DataDumper::UnloadDumpInfo() { | |||||
| if (!load_flag_) { | |||||
| MS_LOG(WARNING) << "Load not success, no need to unload"; | |||||
| return; | |||||
| } | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph_); | |||||
| MS_LOG(INFO) << "[DataDump] UnloadDumpInfo start. graphId:" << kernel_graph_->graph_id(); | |||||
| aicpu::dump::OpMappingInfo op_mapping_info; | |||||
| op_mapping_info.set_model_id(kernel_graph_->graph_id()); | |||||
| op_mapping_info.set_flag(kAicpuUnloadFlag); | |||||
| for (const auto &kernel_name : dump_kernel_names_) { | |||||
| aicpu::dump::Task task; | |||||
| auto iter = runtime_info_map_.find(kernel_name); | |||||
| if (iter == runtime_info_map_.end()) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map"; | |||||
| } | |||||
| MS_EXCEPTION_IF_NULL(iter->second); | |||||
| auto task_id = std::get<kTupleTaskId>(*iter->second); | |||||
| task.set_task_id(task_id); | |||||
| MS_EXCEPTION_IF_NULL(op_mapping_info.mutable_task()); | |||||
| op_mapping_info.mutable_task()->Add(std::move(task)); | |||||
| } | |||||
| RtLoadDumpData(op_mapping_info, &dev_unload_mem_); | |||||
| } | |||||
| void DataDumper::ReleaseDevMem(void **ptr) const { | |||||
| if (ptr == nullptr) { | |||||
| return; | |||||
| } | |||||
| if (*ptr != nullptr) { | |||||
| rtError_t rt_error = rtFree(*ptr); | |||||
| if (rt_error != RT_ERROR_NONE) { | |||||
| MS_LOG(ERROR) << "[DataDump] Call rtFree failed, ret:" << rt_error; | |||||
| } | |||||
| *ptr = nullptr; | |||||
| } | |||||
| } | |||||
| void DataDumper::ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const { | |||||
| dump_task->set_end_graph(false); | |||||
| auto iter = runtime_info_map_.find(kernel->fullname_with_scope()); | |||||
| if (iter == runtime_info_map_.end()) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map"; | |||||
| } | |||||
| MS_EXCEPTION_IF_NULL(iter->second); | |||||
| auto task_id = std::get<kTupleTaskId>(*iter->second); | |||||
| auto stream_id = std::get<kTupleStreamId>(*iter->second); | |||||
| auto args = std::get<kTupleArgs>(*iter->second); | |||||
| MS_LOG(INFO) << "[DataDump] Get runtime info task_id:" << task_id << " stream_id:" << stream_id; | |||||
| dump_task->set_task_id(task_id); | |||||
| dump_task->set_stream_id(stream_id); | |||||
| MS_EXCEPTION_IF_NULL(dump_task->mutable_op()); | |||||
| dump_task->mutable_op()->set_op_name(kernel->fullname_with_scope()); | |||||
| dump_task->mutable_op()->set_op_type(AnfAlgo::GetCNodeName(kernel.get())); | |||||
| DumpKernelOutput(kernel, args, dump_task); | |||||
| DumpKernelInput(kernel, args, dump_task); | |||||
| } | |||||
| void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr) { | |||||
| std::string proto_str; | |||||
| size_t proto_size = dump_info.ByteSizeLong(); | |||||
| bool ret = dump_info.SerializeToString(&proto_str); | |||||
| if (!ret || proto_size == 0) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] Protobuf SerializeToString failed, proto size %zu."; | |||||
| } | |||||
| rtError_t rt_ret = rtMalloc(ptr, proto_size, RT_MEMORY_HBM); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed"; | |||||
| } | |||||
| if (ptr == nullptr) { | |||||
| MS_LOG(ERROR) << "[DataDump] rtMalloc failed, ptr is nullptr"; | |||||
| return; | |||||
| } | |||||
| rt_ret = rtMemcpy(*ptr, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed"; | |||||
| } | |||||
| MS_LOG(INFO) << "[DataDump] rtDatadumpInfoLoad start"; | |||||
| rt_ret = rtDatadumpInfoLoad(*ptr, proto_size); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| MS_LOG(EXCEPTION) << "[DataDump] Call rtDatadumpInfoLoad failed"; | |||||
| } | |||||
| } | |||||
| void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) { | |||||
| MS_LOG(INFO) << "[DataDump] DumpKernelOutput start. Kernel:" << kernel->fullname_with_scope(); | |||||
| auto input_size = AnfAlgo::GetInputTensorNum(kernel); | |||||
| auto output_size = AnfAlgo::GetOutputTensorNum(kernel); | |||||
| uint64_t offset = sizeof(void *) * input_size; | |||||
| for (size_t i = 0; i < output_size; ++i) { | |||||
| auto data_type = AnfAlgo::GetOutputDeviceDataType(kernel, i); | |||||
| auto output_format = AnfAlgo::GetOutputFormat(kernel, i); | |||||
| auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel, i); | |||||
| aicpu::dump::Output output; | |||||
| output.set_data_type(GetGeDataType(data_type)); | |||||
| output.set_format(GetGeFormat(output_format, output_shape.size())); | |||||
| MS_EXCEPTION_IF_NULL(output.mutable_shape()); | |||||
| for (auto dim : output_shape) { | |||||
| output.mutable_shape()->add_dim(dim); | |||||
| } | |||||
| output.set_original_output_format(GetGeFormat(output_format, output_shape.size())); | |||||
| output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset); | |||||
| MS_EXCEPTION_IF_NULL(task->mutable_output()); | |||||
| task->mutable_output()->Add(std::move(output)); | |||||
| offset += sizeof(void *); | |||||
| } | |||||
| } | |||||
| void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) { | |||||
| MS_LOG(INFO) << "[DataDump] DumpKernelInput start. Kernel:" << kernel->fullname_with_scope(); | |||||
| auto input_size = AnfAlgo::GetInputTensorNum(kernel); | |||||
| uint64_t offset = 0; | |||||
| for (size_t i = 0; i < input_size; ++i) { | |||||
| aicpu::dump::Input input; | |||||
| auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i); | |||||
| auto input_node = input_node_with_index.first; | |||||
| auto input_index = input_node_with_index.second; | |||||
| std::string output_format = AnfAlgo::GetOutputFormat(input_node, input_index); | |||||
| auto output_type = AnfAlgo::GetOutputDeviceDataType(input_node, input_index); | |||||
| if (output_type == kTypeUnknown) { | |||||
| MS_LOG(WARNING) << "[DataDump] It is not suggested to use a lonely weight parameter as the output of graph"; | |||||
| output_type = AnfAlgo::GetOutputInferDataType(input_node, input_index); | |||||
| } | |||||
| auto output_shape = AnfAlgo::GetOutputDeviceShape(input_node, input_index); | |||||
| input.set_data_type(GetGeDataType(output_type)); | |||||
| input.set_format(GetGeFormat(output_format, output_shape.size())); | |||||
| MS_EXCEPTION_IF_NULL(input.mutable_shape()); | |||||
| for (auto dim : output_shape) { | |||||
| input.mutable_shape()->add_dim(dim); | |||||
| } | |||||
| input.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset); | |||||
| MS_EXCEPTION_IF_NULL(task->mutable_input()); | |||||
| task->mutable_input()->Add(std::move(input)); | |||||
| offset += sizeof(void *); | |||||
| } | |||||
| } | |||||
| } // namespace ascend | |||||
| } // namespace device | |||||
| } // namespace mindspore | |||||
| #endif | |||||
| @@ -0,0 +1,69 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_ | |||||
| #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_ | |||||
| #ifdef ENABLE_DATA_DUMP | |||||
| #include <tuple> | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include "session/kernel_graph.h" | |||||
| namespace aicpu { | |||||
| namespace dump { | |||||
| class OpMappingInfo; | |||||
| class Task; | |||||
| } // namespace dump | |||||
| } // namespace aicpu | |||||
| namespace mindspore { | |||||
| namespace device { | |||||
| namespace ascend { | |||||
| // tuple(op_name, task_id, stream_id, args) | |||||
| using RuntimeInfo = std::tuple<uint32_t, uint32_t, void *>; | |||||
| class DataDumper { | |||||
| public: | |||||
| DataDumper(const session::KernelGraph *kernel_graph, | |||||
| const std::map<std::string, std::shared_ptr<RuntimeInfo>> &runtime_info_map) | |||||
| : load_flag_(false), | |||||
| dev_load_mem_(nullptr), | |||||
| dev_unload_mem_(nullptr), | |||||
| kernel_graph_(kernel_graph), | |||||
| runtime_info_map_(runtime_info_map) {} | |||||
| ~DataDumper(); | |||||
| void LoadDumpInfo(); | |||||
| void UnloadDumpInfo(); | |||||
| private: | |||||
| void ReleaseDevMem(void **ptr) const; | |||||
| bool KernelNeedDump(const CNodePtr &kernel) const; | |||||
| void SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const; | |||||
| void ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const; | |||||
| bool load_flag_; | |||||
| void *dev_load_mem_; | |||||
| void *dev_unload_mem_; | |||||
| std::vector<std::string> dump_kernel_names_; | |||||
| const session::KernelGraph *kernel_graph_; | |||||
| std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map_; | |||||
| }; | |||||
| } // namespace ascend | |||||
| } // namespace device | |||||
| } // namespace mindspore | |||||
| #endif | |||||
| #endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_ | |||||
| @@ -0,0 +1,120 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_ | |||||
| #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_ | |||||
| #include <map> | |||||
| #include <string> | |||||
| #include "proto/ge_dtype.pb.h" | |||||
| #include "ir/dtype/type_id.h" | |||||
| #include "utils/utils.h" | |||||
| namespace mindspore { | |||||
| namespace device { | |||||
| namespace ascend { | |||||
| static ge::proto::DataType GetGeDataType(TypeId type_id) { | |||||
| static const std::map<TypeId, ge::proto::DataType> data_type_map = { | |||||
| {TypeId::kTypeUnknown, ge::proto::DT_UNDEFINED}, {TypeId::kNumberTypeFloat32, ge::proto::DT_FLOAT}, | |||||
| {TypeId::kNumberTypeFloat16, ge::proto::DT_FLOAT16}, {TypeId::kNumberTypeInt8, ge::proto::DT_INT8}, | |||||
| {TypeId::kNumberTypeUInt8, ge::proto::DT_UINT8}, {TypeId::kNumberTypeInt16, ge::proto::DT_INT16}, | |||||
| {TypeId::kNumberTypeUInt16, ge::proto::DT_UINT16}, {TypeId::kNumberTypeInt32, ge::proto::DT_INT32}, | |||||
| {TypeId::kNumberTypeInt64, ge::proto::DT_INT64}, {TypeId::kNumberTypeUInt32, ge::proto::DT_UINT32}, | |||||
| {TypeId::kNumberTypeUInt64, ge::proto::DT_UINT64}, {TypeId::kNumberTypeBool, ge::proto::DT_BOOL}, | |||||
| {TypeId::kNumberTypeFloat64, ge::proto::DT_DOUBLE}, | |||||
| }; | |||||
| MS_LOG(INFO) << "Vm origin type_id:" << type_id; | |||||
| auto iter = data_type_map.find(type_id); | |||||
| if (iter == data_type_map.end()) { | |||||
| MS_LOG(EXCEPTION) << "Invalid data type:" << type_id; | |||||
| } | |||||
| return iter->second; | |||||
| } | |||||
| enum GeFormat { | |||||
| kFormat_NCHW = 0, // NCHW | |||||
| kFormat_NHWC, // NHWC | |||||
| kFormat_ND, // Nd Tensor | |||||
| kFormat_NC1HWC0, // NC1HWC0 | |||||
| kFormat_FRACTAL_Z, // FRACTAL_Z | |||||
| kFormat_NC1C0HWPAD, | |||||
| kFormat_NHWC1C0, | |||||
| kFormat_FSR_NCHW, | |||||
| kFormat_FRACTAL_DECONV, | |||||
| kFormat_C1HWNC0, | |||||
| kFormat_FRACTAL_DECONV_TRANSPOSE, | |||||
| kFormat_FRACTAL_DECONV_SP_STRIDE_TRANS, | |||||
| kFormat_NC1HWC0_C04, // NC1HWC0, C0 =4 | |||||
| kFormat_FRACTAL_Z_C04, // FRACZ, C0 =4 | |||||
| kFormat_CHWN, | |||||
| kFormat_FRACTAL_DECONV_SP_STRIDE8_TRANS, | |||||
| kFormat_HWCN, | |||||
| kFormat_NC1KHKWHWC0, // KH,KW kernel h& kernel w maxpooling max output format | |||||
| kFormat_BN_WEIGHT, | |||||
| kFormat_FILTER_HWCK, // filter input tensor format | |||||
| kFormat_HASHTABLE_LOOKUP_LOOKUPS = 20, | |||||
| kFormat_HASHTABLE_LOOKUP_KEYS, | |||||
| kFormat_HASHTABLE_LOOKUP_VALUE, | |||||
| kFormat_HASHTABLE_LOOKUP_OUTPUT, | |||||
| kFormat_HASHTABLE_LOOKUP_HITS = 24, | |||||
| kFormat_C1HWNCoC0, | |||||
| kFormat_MD, | |||||
| kFormat_NDHWC, | |||||
| kFormat_FRACTAL_ZZ, | |||||
| kFormat_FRACTAL_NZ, | |||||
| kFormat_NCDHW, | |||||
| kFormat_DHWCN, // 3D filter input tensor format | |||||
| kFormat_NDC1HWC0, | |||||
| kFormat_FRACTAL_Z_3D, | |||||
| kFormat_CN, | |||||
| kFormat_NC, | |||||
| kFormat_DHWNC, | |||||
| kFormat_FRACTAL_Z_3D_TRANSPOSE, // 3D filter(transpose) input tensor format | |||||
| kFormat_RESERVED, | |||||
| kFormat_ALL | |||||
| }; | |||||
| static GeFormat GetGeFormat(const std::string &format, size_t shape_size) { | |||||
| static const std::map<std::string, GeFormat> format_map = { | |||||
| // default format: nchw, fractal_nz? | |||||
| {kOpFormat_DEFAULT, kFormat_NCHW}, | |||||
| {kOpFormat_NC1KHKWHWC0, kFormat_NC1KHKWHWC0}, | |||||
| {kOpFormat_ND, kFormat_ND}, | |||||
| {kOpFormat_NCHW, kFormat_NCHW}, | |||||
| {kOpFormat_NHWC, kFormat_NHWC}, | |||||
| {kOpFormat_HWCN, kFormat_HWCN}, | |||||
| {kOpFormat_NC1HWC0, kFormat_NC1HWC0}, | |||||
| {kOpFormat_FRAC_Z, kFormat_FRACTAL_Z}, | |||||
| {kOpFormat_FRAC_NZ, kFormat_FRACTAL_NZ}, | |||||
| {kOpFormat_C1HWNCoC0, kFormat_C1HWNCoC0}, | |||||
| {kOpFormat_NC1HWC0_C04, kFormat_NC1HWC0_C04}, | |||||
| {kOpFormat_FRACTAL_Z_C04, kFormat_FRACTAL_Z_C04}, | |||||
| {kOpFormat_NDHWC, kFormat_NDHWC}, | |||||
| }; | |||||
| MS_LOG(INFO) << "GetGeFormat format:" << format << " shape_size:" << shape_size; | |||||
| if (format == kOpFormat_DEFAULT) { | |||||
| return shape_size == 4 ? kFormat_NCHW : kFormat_ND; | |||||
| } | |||||
| auto iter = format_map.find(format); | |||||
| if (iter == format_map.end()) { | |||||
| MS_LOG(EXCEPTION) << "Invalid format:" << format; | |||||
| } | |||||
| return iter->second; | |||||
| } | |||||
| } // namespace ascend | |||||
| } // namespace device | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_ | |||||
| @@ -0,0 +1,49 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| syntax = "proto3"; | |||||
| package ge.proto; | |||||
| enum DataType | |||||
| { | |||||
| DT_UNDEFINED = 0; // Used to indicate a DataType field has not been set. | |||||
| DT_FLOAT = 1; // float type | |||||
| DT_FLOAT16 = 2; // fp16 type | |||||
| DT_INT8 = 3; // int8 type | |||||
| DT_UINT8 = 4; // uint8 type | |||||
| DT_INT16 = 5; // int16 type | |||||
| DT_UINT16 = 6; // uint16 type | |||||
| DT_INT32 = 7; // | |||||
| DT_INT64 = 8; // int64 type | |||||
| DT_UINT32 = 9; // unsigned int32 | |||||
| DT_UINT64 = 10; // unsigned int64 | |||||
| DT_BOOL = 11; // bool type | |||||
| DT_DOUBLE = 12; // double type | |||||
| DT_STRING = 13; // string type | |||||
| DT_DUAL_SUB_INT8 = 14; /**< dual output int8 type */ | |||||
| DT_DUAL_SUB_UINT8 = 15; /**< dual output uint8 type */ | |||||
| DT_COMPLEX64 = 16; // complex64 type | |||||
| DT_COMPLEX128 = 17; // complex128 type | |||||
| DT_QINT8 = 18; // qint8 type | |||||
| DT_QINT16 = 19; // qint16 type | |||||
| DT_QINT32 = 20; // qint32 type | |||||
| DT_QUINT8 = 21; // quint8 type | |||||
| DT_QUINT16 = 22; // quint16 type | |||||
| DT_RESOURCE = 23; // resource type | |||||
| DT_STRING_REF = 24; // string_ref type | |||||
| DT_DUAL = 25; /**< dual output type */ | |||||
| } | |||||
| @@ -0,0 +1,78 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| syntax = "proto3"; | |||||
| package aicpu.dump; | |||||
| message Shape { | |||||
| repeated uint64 dim = 1; | |||||
| } | |||||
| message Output { | |||||
| int32 data_type = 1; | |||||
| int32 format = 2; | |||||
| Shape shape = 3; | |||||
| uint64 address = 4; | |||||
| string original_name = 5; | |||||
| int32 original_output_index = 6; | |||||
| int32 original_output_data_type = 7; | |||||
| int32 original_output_format = 8; | |||||
| uint64 size = 9; | |||||
| }; | |||||
| message Input { | |||||
| int32 data_type = 1; | |||||
| int32 format = 2; | |||||
| Shape shape = 3; | |||||
| uint64 address = 4; | |||||
| uint64 size = 5; | |||||
| } | |||||
| message Op { | |||||
| string op_name = 1; | |||||
| string op_type = 2; | |||||
| }; | |||||
| message Task { | |||||
| uint32 task_id = 1; | |||||
| uint32 stream_id = 2; | |||||
| Op op = 3; | |||||
| repeated Output output = 4; | |||||
| bool end_graph = 5; | |||||
| repeated Input input = 6; | |||||
| }; | |||||
| message OpMappingInfo { | |||||
| string dump_path = 1; | |||||
| oneof model_name_param { | |||||
| string model_name = 2; | |||||
| } | |||||
| oneof model_id_param { | |||||
| uint32 model_id = 3; | |||||
| } | |||||
| oneof step_id { | |||||
| uint64 step_id_addr = 4; | |||||
| } | |||||
| oneof iterations_per_loop { | |||||
| uint64 iterations_per_loop_addr = 5; | |||||
| } | |||||
| oneof loop_cond { | |||||
| uint64 loop_cond_addr = 6; | |||||
| } | |||||
| uint32 flag = 7; // 0x01 load, 0x00 unload | |||||
| repeated Task task = 8; | |||||
| string dump_step = 9; | |||||
| }; | |||||
| @@ -127,6 +127,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i | |||||
| AddressPtrList kernel_outputs; | AddressPtrList kernel_outputs; | ||||
| auto kernel_mod = AnfAlgo::GetKernelMod(anf_node_ptr); | auto kernel_mod = AnfAlgo::GetKernelMod(anf_node_ptr); | ||||
| MS_EXCEPTION_IF_NULL(kernel_mod); | MS_EXCEPTION_IF_NULL(kernel_mod); | ||||
| kernel_mod->set_kernel_name(anf_node_ptr->fullname_with_scope()); | |||||
| if (AnfAlgo::GetCNodeName(anf_node_ptr) != kAtomicAddrCleanOpName) { | if (AnfAlgo::GetCNodeName(anf_node_ptr) != kAtomicAddrCleanOpName) { | ||||
| for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_node_ptr); ++i) { | for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_node_ptr); ++i) { | ||||
| auto real_input_index = AnfAlgo::GetRealInputIndex(anf_node_ptr, i); | auto real_input_index = AnfAlgo::GetRealInputIndex(anf_node_ptr, i); | ||||
| @@ -34,6 +34,7 @@ class CPUKernelRuntime; | |||||
| namespace ascend { | namespace ascend { | ||||
| class AscendKernelRuntime; | class AscendKernelRuntime; | ||||
| class AscendMemoryManager; | class AscendMemoryManager; | ||||
| class DataDumper; | |||||
| namespace tasksink { | namespace tasksink { | ||||
| class TaskGenerator; | class TaskGenerator; | ||||
| } // namespace tasksink | } // namespace tasksink | ||||
| @@ -90,6 +91,7 @@ class DeviceAddress { | |||||
| friend class mindspore::device::gpu::GPUMemoryManager; | friend class mindspore::device::gpu::GPUMemoryManager; | ||||
| friend class mindspore::device::ascend::AscendKernelRuntime; | friend class mindspore::device::ascend::AscendKernelRuntime; | ||||
| friend class mindspore::device::ascend::AscendMemoryManager; | friend class mindspore::device::ascend::AscendMemoryManager; | ||||
| friend class mindspore::device::ascend::DataDumper; | |||||
| }; | }; | ||||
| using DeviceAddressPtr = std::shared_ptr<DeviceAddress>; | using DeviceAddressPtr = std::shared_ptr<DeviceAddress>; | ||||
| @@ -34,6 +34,7 @@ | |||||
| #include "device/ascend/kernel_select_ascend.h" | #include "device/ascend/kernel_select_ascend.h" | ||||
| #include "runtime/base.h" | #include "runtime/base.h" | ||||
| #include "device/ascend/ascend_stream_assign.h" | #include "device/ascend/ascend_stream_assign.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| using device::ascend::ProfilingUtils; | using device::ascend::ProfilingUtils; | ||||
| @@ -117,6 +118,7 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph> | |||||
| std::vector<AnfNodePtr> *mute_inputs = kernel_graph_ptr->MutableInputs(); | std::vector<AnfNodePtr> *mute_inputs = kernel_graph_ptr->MutableInputs(); | ||||
| MS_EXCEPTION_IF_NULL(mute_inputs); | MS_EXCEPTION_IF_NULL(mute_inputs); | ||||
| mute_inputs->push_back(switch_loop_input[kLoopCountParamName]); | mute_inputs->push_back(switch_loop_input[kLoopCountParamName]); | ||||
| mute_inputs->push_back(switch_loop_input[kEpochParamName]); | |||||
| mute_inputs->push_back(switch_loop_input[kIterLoopParamName]); | mute_inputs->push_back(switch_loop_input[kIterLoopParamName]); | ||||
| mute_inputs->push_back(switch_loop_input[kZeroParamName]); | mute_inputs->push_back(switch_loop_input[kZeroParamName]); | ||||
| mute_inputs->push_back(switch_loop_input[kOneParamName]); | mute_inputs->push_back(switch_loop_input[kOneParamName]); | ||||
| @@ -316,6 +318,13 @@ void KernelAdjust::CreateSwitchOpParameters(const std::shared_ptr<session::Kerne | |||||
| one->set_abstract(paremeter_abstract_ptr); | one->set_abstract(paremeter_abstract_ptr); | ||||
| ParameterPtr one_new = kernel_graph_ptr->NewParameter(one); | ParameterPtr one_new = kernel_graph_ptr->NewParameter(one); | ||||
| (*switch_loop_input)[kOneParamName] = one_new; | (*switch_loop_input)[kOneParamName] = one_new; | ||||
| ParameterPtr epoch = std::make_shared<Parameter>(kernel_graph_ptr); | |||||
| MS_EXCEPTION_IF_NULL(epoch); | |||||
| epoch->set_name(kEpochParamName); | |||||
| epoch->set_abstract(paremeter_abstract_ptr); | |||||
| ParameterPtr epoch_new = kernel_graph_ptr->NewParameter(epoch); | |||||
| (*switch_loop_input)[kEpochParamName] = epoch_new; | |||||
| } | } | ||||
| kernel::KernelBuildInfo::KernelBuildInfoBuilder KernelAdjust::CreateMngKernelBuilder( | kernel::KernelBuildInfo::KernelBuildInfoBuilder KernelAdjust::CreateMngKernelBuilder( | ||||
| @@ -510,6 +519,14 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) { | |||||
| *val = 0; | *val = 0; | ||||
| inputs->push_back(loop_count_tensor); | inputs->push_back(loop_count_tensor); | ||||
| // Epoch in device | |||||
| tensor::TensorPtr epoch_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp); | |||||
| MS_EXCEPTION_IF_NULL(epoch_tensor); | |||||
| val = static_cast<int32_t *>(epoch_tensor->data_c()); | |||||
| MS_EXCEPTION_IF_NULL(val); | |||||
| *val = 0; | |||||
| inputs->push_back(epoch_tensor); | |||||
| tensor::TensorPtr iter_loop_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp); | tensor::TensorPtr iter_loop_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp); | ||||
| MS_EXCEPTION_IF_NULL(iter_loop_tensor); | MS_EXCEPTION_IF_NULL(iter_loop_tensor); | ||||
| val = static_cast<int32_t *>(iter_loop_tensor->data_c()); | val = static_cast<int32_t *>(iter_loop_tensor->data_c()); | ||||
| @@ -531,6 +548,7 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) { | |||||
| MS_EXCEPTION_IF_NULL(val); | MS_EXCEPTION_IF_NULL(val); | ||||
| *val = 1; | *val = 1; | ||||
| inputs->push_back(one_tensor); | inputs->push_back(one_tensor); | ||||
| MS_LOG(INFO) << "---------------- LoadSwitchInputs End--"; | MS_LOG(INFO) << "---------------- LoadSwitchInputs End--"; | ||||
| } | } | ||||
| @@ -37,6 +37,7 @@ constexpr auto kLoopCountParamName = "loop_count"; | |||||
| constexpr auto kIterLoopParamName = "iter_loop"; | constexpr auto kIterLoopParamName = "iter_loop"; | ||||
| constexpr auto kZeroParamName = "zero"; | constexpr auto kZeroParamName = "zero"; | ||||
| constexpr auto kOneParamName = "one"; | constexpr auto kOneParamName = "one"; | ||||
| constexpr auto kEpochParamName = "loop_epoch"; | |||||
| constexpr auto kStreamNeedActivedFirst = "stream_need_active_first"; | constexpr auto kStreamNeedActivedFirst = "stream_need_active_first"; | ||||
| constexpr uint32_t kSecondStreamSwitchLabel = 2; | constexpr uint32_t kSecondStreamSwitchLabel = 2; | ||||
| @@ -26,6 +26,7 @@ | |||||
| #include "kernel/aicpu/aicpu_kernel_build.h" | #include "kernel/aicpu/aicpu_kernel_build.h" | ||||
| #include "utils/convert_utils.h" | #include "utils/convert_utils.h" | ||||
| #include "kernel/aicpu/aicpu_util.h" | #include "kernel/aicpu/aicpu_util.h" | ||||
| #include "utils/context/ms_context.h" | |||||
| using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>; | using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>; | ||||
| @@ -144,8 +145,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr> | |||||
| if (node_name_ == kTopK) { | if (node_name_ == kTopK) { | ||||
| node_name_ = kTopKV2; | node_name_ = kTopKV2; | ||||
| } | } | ||||
| AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>( | AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>( | ||||
| stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs); | |||||
| kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump()); | |||||
| MS_LOG(INFO) << "AicpuOpKernelMod GenTask end"; | MS_LOG(INFO) << "AicpuOpKernelMod GenTask end"; | ||||
| return {task_info_ptr}; | return {task_info_ptr}; | ||||
| @@ -26,6 +26,7 @@ | |||||
| #include "runtime/rt.h" | #include "runtime/rt.h" | ||||
| #include "utils/log_adapter.h" | #include "utils/log_adapter.h" | ||||
| #include "utils/convert_utils.h" | #include "utils/convert_utils.h" | ||||
| #include "utils/context/ms_context.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| @@ -123,8 +124,8 @@ std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &in | |||||
| MS_LOG(DEBUG) << "The block_dim is:" << block_dim; | MS_LOG(DEBUG) << "The block_dim is:" << block_dim; | ||||
| TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>( | TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>( | ||||
| stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, input_data_addrs, | |||||
| output_data_addrs, workspace_addrs); | |||||
| kernel_name_, stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, | |||||
| input_data_addrs, output_data_addrs, workspace_addrs, NeedDump()); | |||||
| return {task_info_ptr}; | return {task_info_ptr}; | ||||
| } | } | ||||
| } // namespace kernel | } // namespace kernel | ||||
| @@ -21,6 +21,9 @@ | |||||
| #include <memory> | #include <memory> | ||||
| #include "framework/ge_runtime/task_info.h" | #include "framework/ge_runtime/task_info.h" | ||||
| #include "kernel/kernel.h" | #include "kernel/kernel.h" | ||||
| #ifdef ENABLE_DATA_DUMP | |||||
| #include "debug/data_dump_parser.h" | |||||
| #endif | |||||
| using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>; | using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>; | ||||
| namespace mindspore { | namespace mindspore { | ||||
| @@ -31,6 +34,13 @@ class AscendKernelMod : public KernelMod { | |||||
| const std::vector<AddressPtr> &, uint32_t) = 0; | const std::vector<AddressPtr> &, uint32_t) = 0; | ||||
| uint32_t block_dim() { return block_dim_; } | uint32_t block_dim() { return block_dim_; } | ||||
| uint32_t stream_id() { return stream_id_; } | uint32_t stream_id() { return stream_id_; } | ||||
| virtual bool NeedDump() { | |||||
| #ifdef ENABLE_DATA_DUMP | |||||
| return DataDumpParser::GetInstance().NeedDump(kernel_name_); | |||||
| #else | |||||
| return false; | |||||
| #endif | |||||
| } | |||||
| protected: | protected: | ||||
| uint32_t block_dim_{1}; | uint32_t block_dim_{1}; | ||||
| @@ -18,6 +18,7 @@ | |||||
| #include "device/ascend/tasksink/runtime_utils.h" | #include "device/ascend/tasksink/runtime_utils.h" | ||||
| #include "session/anf_runtime_algorithm.h" | #include "session/anf_runtime_algorithm.h" | ||||
| #include "utils/utils.h" | #include "utils/utils.h" | ||||
| #include "utils/context/ms_context.h" | |||||
| using HcclTaskInfoPtr = std::shared_ptr<ge::model_runner::HcclTaskInfo>; | using HcclTaskInfoPtr = std::shared_ptr<ge::model_runner::HcclTaskInfo>; | ||||
| using ge::model_runner::HcclTaskInfo; | using ge::model_runner::HcclTaskInfo; | ||||
| @@ -146,10 +147,12 @@ std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inpu | |||||
| << ", root_id=" << root_id_ << ", op_type=" << static_cast<int>(op_type_) | << ", root_id=" << root_id_ << ", op_type=" << static_cast<int>(op_type_) | ||||
| << ", data_type=" << static_cast<int>(data_type); | << ", data_type=" << static_cast<int>(data_type); | ||||
| auto context_ptr = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||||
| HcclTaskInfoPtr task_info_ptr = std::make_shared<HcclTaskInfo>( | HcclTaskInfoPtr task_info_ptr = std::make_shared<HcclTaskInfo>( | ||||
| stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0, private_def, nullptr, | |||||
| hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel, RuntimeUtils::HcomUnbindModel, | |||||
| RuntimeUtils::HcomDistribute); | |||||
| kernel_name_, stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0, | |||||
| private_def, nullptr, hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel, | |||||
| RuntimeUtils::HcomUnbindModel, RuntimeUtils::HcomDistribute, NeedDump()); | |||||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | MS_EXCEPTION_IF_NULL(task_info_ptr); | ||||
| return {task_info_ptr}; | return {task_info_ptr}; | ||||
| } | } | ||||
| @@ -129,6 +129,10 @@ class KernelMod { | |||||
| virtual std::vector<size_t> GenParameters() { return {}; } | virtual std::vector<size_t> GenParameters() { return {}; } | ||||
| virtual ~KernelMod() = default; | virtual ~KernelMod() = default; | ||||
| void set_kernel_name(const std::string &kernel_name) { kernel_name_ = kernel_name; } | |||||
| protected: | |||||
| std::string kernel_name_; | |||||
| }; | }; | ||||
| using KernelModPtr = std::shared_ptr<KernelMod>; | using KernelModPtr = std::shared_ptr<KernelMod>; | ||||
| } // namespace kernel | } // namespace kernel | ||||
| @@ -58,8 +58,9 @@ std::vector<TaskInfoPtr> AssignKernel::GenTask(const std::vector<AddressPtr> &in | |||||
| } | } | ||||
| stream_id_ = stream_id; | stream_id_ = stream_id; | ||||
| std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>( | |||||
| stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr, inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE); | |||||
| std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = | |||||
| std::make_shared<MemcpyAsyncTaskInfo>(kernel_name_, stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr, | |||||
| inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE, false); | |||||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | MS_EXCEPTION_IF_NULL(task_info_ptr); | ||||
| return {task_info_ptr}; | return {task_info_ptr}; | ||||
| } | } | ||||
| @@ -55,7 +55,8 @@ std::vector<TaskInfoPtr> LabelGotoKernel::GenTask(const std::vector<AddressPtr> | |||||
| const std::vector<AddressPtr> &, uint32_t stream_id) { | const std::vector<AddressPtr> &, uint32_t stream_id) { | ||||
| MS_LOG(INFO) << "LabelGotoKernel GenTask label:" << label_ << ", stream id:" << stream_id; | MS_LOG(INFO) << "LabelGotoKernel GenTask label:" << label_ << ", stream id:" << stream_id; | ||||
| std::vector<TaskInfoPtr> task_info_list; | std::vector<TaskInfoPtr> task_info_list; | ||||
| std::shared_ptr<LabelGotoTaskInfo> task_info_ptr = std::make_shared<LabelGotoTaskInfo>(stream_id, label_); | |||||
| std::shared_ptr<LabelGotoTaskInfo> task_info_ptr = | |||||
| std::make_shared<LabelGotoTaskInfo>(kernel_name_, stream_id, label_); | |||||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | MS_EXCEPTION_IF_NULL(task_info_ptr); | ||||
| task_info_list.emplace_back(task_info_ptr); | task_info_list.emplace_back(task_info_ptr); | ||||
| return task_info_list; | return task_info_list; | ||||
| @@ -55,7 +55,7 @@ std::vector<TaskInfoPtr> LabelSetKernel::GenTask(const std::vector<AddressPtr> & | |||||
| const std::vector<AddressPtr> &, uint32_t stream_id) { | const std::vector<AddressPtr> &, uint32_t stream_id) { | ||||
| MS_LOG(INFO) << "LabelSetKernel GenTask label:" << label_ << ", stream id:" << stream_id; | MS_LOG(INFO) << "LabelSetKernel GenTask label:" << label_ << ", stream id:" << stream_id; | ||||
| std::vector<TaskInfoPtr> task_info_list; | std::vector<TaskInfoPtr> task_info_list; | ||||
| std::shared_ptr<LabelSetTaskInfo> task_info_ptr = std::make_shared<LabelSetTaskInfo>(stream_id, label_); | |||||
| std::shared_ptr<LabelSetTaskInfo> task_info_ptr = std::make_shared<LabelSetTaskInfo>(kernel_name_, stream_id, label_); | |||||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | MS_EXCEPTION_IF_NULL(task_info_ptr); | ||||
| task_info_list.emplace_back(task_info_ptr); | task_info_list.emplace_back(task_info_ptr); | ||||
| return task_info_list; | return task_info_list; | ||||
| @@ -67,7 +67,7 @@ std::vector<TaskInfoPtr> LabelSwitchKernel::GenTask(const std::vector<AddressPtr | |||||
| MS_LOG(INFO) << "LabelSwitchKernel GenTask label size:" << label_size_ << ", stream id:" << stream_id; | MS_LOG(INFO) << "LabelSwitchKernel GenTask label size:" << label_size_ << ", stream id:" << stream_id; | ||||
| std::vector<TaskInfoPtr> task_info_list; | std::vector<TaskInfoPtr> task_info_list; | ||||
| cond_ = inputs[0]->addr; | cond_ = inputs[0]->addr; | ||||
| auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(stream_id, label_size_, label_list_, cond_); | |||||
| auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(kernel_name_, stream_id, label_size_, label_list_, cond_); | |||||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | MS_EXCEPTION_IF_NULL(task_info_ptr); | ||||
| task_info_list.emplace_back(task_info_ptr); | task_info_list.emplace_back(task_info_ptr); | ||||
| return task_info_list; | return task_info_list; | ||||
| @@ -23,6 +23,7 @@ | |||||
| #include "common/utils.h" | #include "common/utils.h" | ||||
| #include "session/anf_runtime_algorithm.h" | #include "session/anf_runtime_algorithm.h" | ||||
| #include "common/trans.h" | #include "common/trans.h" | ||||
| #include "utils/context/ms_context.h" | |||||
| using ge::model_runner::MemcpyAsyncTaskInfo; | using ge::model_runner::MemcpyAsyncTaskInfo; | ||||
| using MemcpyAsyncTaskInfoPtr = std::shared_ptr<MemcpyAsyncTaskInfo>; | using MemcpyAsyncTaskInfoPtr = std::shared_ptr<MemcpyAsyncTaskInfo>; | ||||
| @@ -118,8 +119,9 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const std::vector<AddressPtr | |||||
| } | } | ||||
| stream_id_ = stream_id; | stream_id_ = stream_id; | ||||
| std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>( | |||||
| stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE); | |||||
| std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = | |||||
| std::make_shared<MemcpyAsyncTaskInfo>(kernel_name_, stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr, | |||||
| inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE, NeedDump()); | |||||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | MS_EXCEPTION_IF_NULL(task_info_ptr); | ||||
| return {task_info_ptr}; | return {task_info_ptr}; | ||||
| } | } | ||||
| @@ -63,7 +63,7 @@ std::vector<TaskInfoPtr> ProfilingKernelMod::GenTask(const std::vector<AddressPt | |||||
| << ", outputs size:" << outputs.size(); | << ", outputs size:" << outputs.size(); | ||||
| stream_id_ = stream_id; | stream_id_ = stream_id; | ||||
| std::shared_ptr<ProfilerTraceTaskInfo> task_info_ptr = | std::shared_ptr<ProfilerTraceTaskInfo> task_info_ptr = | ||||
| std::make_shared<ProfilerTraceTaskInfo>(stream_id, log_id_, notify_, flags_); | |||||
| std::make_shared<ProfilerTraceTaskInfo>(kernel_name_, stream_id, log_id_, notify_, flags_); | |||||
| return {task_info_ptr}; | return {task_info_ptr}; | ||||
| } | } | ||||
| } // namespace kernel | } // namespace kernel | ||||
| @@ -60,7 +60,7 @@ std::vector<TaskInfoPtr> RecvKernel::GenTask(const std::vector<AddressPtr> &, co | |||||
| const std::vector<AddressPtr> &, uint32_t stream_id) { | const std::vector<AddressPtr> &, uint32_t stream_id) { | ||||
| MS_LOG(INFO) << "RecvKernel GenTask event_id_:" << event_id_ << ", stream_id_:" << stream_id; | MS_LOG(INFO) << "RecvKernel GenTask event_id_:" << event_id_ << ", stream_id_:" << stream_id; | ||||
| stream_id_ = stream_id; | stream_id_ = stream_id; | ||||
| EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(stream_id, event_id_); | |||||
| EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(kernel_name_, stream_id, event_id_); | |||||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | MS_EXCEPTION_IF_NULL(task_info_ptr); | ||||
| return {task_info_ptr}; | return {task_info_ptr}; | ||||
| } | } | ||||
| @@ -57,7 +57,7 @@ std::vector<TaskInfoPtr> SendKernel::GenTask(const std::vector<AddressPtr> &, co | |||||
| const std::vector<AddressPtr> &, uint32_t stream_id) { | const std::vector<AddressPtr> &, uint32_t stream_id) { | ||||
| MS_LOG(INFO) << "SendKernel GenTask event id:" << event_id_ << ", stream id:" << stream_id; | MS_LOG(INFO) << "SendKernel GenTask event id:" << event_id_ << ", stream id:" << stream_id; | ||||
| stream_id_ = stream_id; | stream_id_ = stream_id; | ||||
| EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(stream_id, event_id_); | |||||
| EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(kernel_name_, stream_id, event_id_); | |||||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | MS_EXCEPTION_IF_NULL(task_info_ptr); | ||||
| return {task_info_ptr}; | return {task_info_ptr}; | ||||
| } | } | ||||
| @@ -72,7 +72,8 @@ std::vector<TaskInfoPtr> StreamActiveKernel::GenTask(const std::vector<AddressPt | |||||
| stream_id_ = stream_id; | stream_id_ = stream_id; | ||||
| std::vector<TaskInfoPtr> task_info_list; | std::vector<TaskInfoPtr> task_info_list; | ||||
| for (auto &index : active_streams_index_) { | for (auto &index : active_streams_index_) { | ||||
| std::shared_ptr<StreamActiveTaskInfo> task_info_ptr = std::make_shared<StreamActiveTaskInfo>(stream_id, index); | |||||
| std::shared_ptr<StreamActiveTaskInfo> task_info_ptr = | |||||
| std::make_shared<StreamActiveTaskInfo>(kernel_name_, stream_id, index); | |||||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | MS_EXCEPTION_IF_NULL(task_info_ptr); | ||||
| task_info_list.emplace_back(task_info_ptr); | task_info_list.emplace_back(task_info_ptr); | ||||
| MS_LOG(INFO) << "StreamActiveKernel GenTask: streamId:" << stream_id << ", Active streamId:" << index; | MS_LOG(INFO) << "StreamActiveKernel GenTask: streamId:" << stream_id << ", Active streamId:" << index; | ||||
| @@ -91,8 +91,8 @@ std::vector<TaskInfoPtr> StreamSwitchKernel::GenTask(const std::vector<AddressPt | |||||
| auto ites_per_loop = inputs[1]->addr; | auto ites_per_loop = inputs[1]->addr; | ||||
| MS_LOG(INFO) << "cond_:" << static_cast<int>(cond_) << ", true_stream_index_:" << true_stream_index_ | MS_LOG(INFO) << "cond_:" << static_cast<int>(cond_) << ", true_stream_index_:" << true_stream_index_ | ||||
| << ", stream_id:" << stream_id; | << ", stream_id:" << stream_id; | ||||
| std::shared_ptr<StreamSwitchTaskInfo> task_info_ptr = | |||||
| std::make_shared<StreamSwitchTaskInfo>(stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_); | |||||
| std::shared_ptr<StreamSwitchTaskInfo> task_info_ptr = std::make_shared<StreamSwitchTaskInfo>( | |||||
| kernel_name_, stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_); | |||||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | MS_EXCEPTION_IF_NULL(task_info_ptr); | ||||
| return {task_info_ptr}; | return {task_info_ptr}; | ||||
| } | } | ||||
| @@ -17,7 +17,7 @@ | |||||
| #include "kernel/tbe/tbe_kernel_mod.h" | #include "kernel/tbe/tbe_kernel_mod.h" | ||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "runtime/rt.h" | #include "runtime/rt.h" | ||||
| #include "nlohmann/json.hpp" | |||||
| #include "utils/context/ms_context.h" | |||||
| #include "graphengine/inc/framework/ge_runtime/task_info.h" | #include "graphengine/inc/framework/ge_runtime/task_info.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| @@ -99,9 +99,9 @@ std::vector<TaskInfoPtr> TbeKernelMod::GenTask(const std::vector<AddressPtr> &in | |||||
| MS_LOG(INFO) << "block_dim is:" << block_dim_; | MS_LOG(INFO) << "block_dim is:" << block_dim_; | ||||
| TbeTaskInfoPtr task_info_ptr = | |||||
| make_shared<ge::model_runner::TbeTaskInfo>(stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0, | |||||
| meta_data, input_data_addrs, output_data_addrs, workspace_addrs); | |||||
| TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>( | |||||
| kernel_name_, stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0, meta_data, input_data_addrs, | |||||
| output_data_addrs, workspace_addrs, NeedDump()); | |||||
| return {task_info_ptr}; | return {task_info_ptr}; | ||||
| } | } | ||||
| @@ -36,7 +36,7 @@ namespace session { | |||||
| using AnfWithOutIndex = std::pair<AnfNodePtr, size_t>; | using AnfWithOutIndex = std::pair<AnfNodePtr, size_t>; | ||||
| class KernelGraph : public FuncGraph { | class KernelGraph : public FuncGraph { | ||||
| public: | public: | ||||
| KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false) { | |||||
| KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false), current_epoch_(0) { | |||||
| inputs_ = std::make_shared<std::vector<AnfNodePtr>>(); | inputs_ = std::make_shared<std::vector<AnfNodePtr>>(); | ||||
| execution_order_ = {}; | execution_order_ = {}; | ||||
| executable_ = true; | executable_ = true; | ||||
| @@ -154,6 +154,8 @@ class KernelGraph : public FuncGraph { | |||||
| AnfNodePtr GetFrontNodeByInternalOutput(const AnfNodePtr &node) const; | AnfNodePtr GetFrontNodeByInternalOutput(const AnfNodePtr &node) const; | ||||
| void AddFinalOutputKernel(const AnfNodePtr &node); | void AddFinalOutputKernel(const AnfNodePtr &node); | ||||
| bool IsFinalOutputKernel(const AnfNodePtr &node) const; | bool IsFinalOutputKernel(const AnfNodePtr &node) const; | ||||
| uint32_t current_epoch() const { return current_epoch_; } | |||||
| void set_current_epoch(uint32_t epoch) { current_epoch_ = epoch; } | |||||
| private: | private: | ||||
| // remove value node form graph | // remove value node form graph | ||||
| @@ -216,6 +218,7 @@ class KernelGraph : public FuncGraph { | |||||
| std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_; | std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_; | ||||
| std::unordered_map<AnfNodePtr, AnfNodePtr> internal_outputs_to_front_map_; | std::unordered_map<AnfNodePtr, AnfNodePtr> internal_outputs_to_front_map_; | ||||
| std::set<AnfNodePtr> final_output_kernels_; | std::set<AnfNodePtr> final_output_kernels_; | ||||
| uint32_t current_epoch_; | |||||
| }; | }; | ||||
| } // namespace session | } // namespace session | ||||
| using KernelGraphPtr = std::shared_ptr<session::KernelGraph>; | using KernelGraphPtr = std::shared_ptr<session::KernelGraph>; | ||||
| @@ -187,6 +187,18 @@ size_t LoadCtrlInputTensor(const std::shared_ptr<KernelGraph> &graph, std::vecto | |||||
| // set loop_count to zero | // set loop_count to zero | ||||
| MS_EXCEPTION_IF_NULL(inputs); | MS_EXCEPTION_IF_NULL(inputs); | ||||
| inputs->push_back(tensor); | inputs->push_back(tensor); | ||||
| auto epoch_tensor = (*inputs_params)[1]; | |||||
| MS_EXCEPTION_IF_NULL(epoch_tensor); | |||||
| auto *epoch_val = static_cast<int32_t *>(epoch_tensor->data_c()); | |||||
| MS_EXCEPTION_IF_NULL(epoch_val); | |||||
| *epoch_val = graph->current_epoch(); | |||||
| epoch_tensor->set_dirty(true); | |||||
| inputs->push_back(epoch_tensor); | |||||
| MS_LOG(INFO) << "Load epoch_val:" << *epoch_val; | |||||
| graph->set_current_epoch(graph->current_epoch() + 1); | |||||
| return inputs_params->size(); | return inputs_params->size(); | ||||
| } | } | ||||
| @@ -814,13 +826,13 @@ void SessionBasic::AddParameterToGraphInputs(const std::vector<AnfNodePtr> ¶ | |||||
| void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph, | void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph, | ||||
| const std::vector<tensor::TensorPtr> &inputs_const) const { | const std::vector<tensor::TensorPtr> &inputs_const) const { | ||||
| std::vector<tensor::TensorPtr> inputs(inputs_const); | std::vector<tensor::TensorPtr> inputs(inputs_const); | ||||
| size_t input_ctrl_size = 1; | |||||
| size_t input_ctrl_size = 2; | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | MS_EXCEPTION_IF_NULL(kernel_graph); | ||||
| if (kernel_graph->input_ctrl_tensors()) { | if (kernel_graph->input_ctrl_tensors()) { | ||||
| input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs); | input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs); | ||||
| } | } | ||||
| auto input_nodes = kernel_graph->inputs(); | auto input_nodes = kernel_graph->inputs(); | ||||
| if ((inputs.size() + input_ctrl_size) - 1 != input_nodes.size()) { | |||||
| if ((inputs.size() + input_ctrl_size) - 2 != input_nodes.size()) { | |||||
| MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size() | MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size() | ||||
| << ", input_ctrl_size:" << input_ctrl_size; | << ", input_ctrl_size:" << input_ctrl_size; | ||||
| } | } | ||||
| @@ -32,6 +32,8 @@ bool ModelRunner::LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint | |||||
| bool ModelRunner::UnloadModel(uint32_t model_id) { return true; } | bool ModelRunner::UnloadModel(uint32_t model_id) { return true; } | ||||
| bool ModelRunner::LoadModelComplete(uint32_t model_id) { return true; } | |||||
| bool ModelRunner::RunModel(uint32_t model_id, const ge::InputData &input_data, ge::OutputData *output_data) { | bool ModelRunner::RunModel(uint32_t model_id, const ge::InputData &input_data, ge::OutputData *output_data) { | ||||
| return true; | return true; | ||||
| } | } | ||||
| @@ -45,6 +47,11 @@ const std::vector<uint32_t> &ModelRunner::GetStreamIdList(uint32_t model_id) con | |||||
| static std::vector<uint32_t> stream_id_list; | static std::vector<uint32_t> stream_id_list; | ||||
| return stream_id_list; | return stream_id_list; | ||||
| } | } | ||||
| const std::map<std::string, std::shared_ptr<RuntimeInfo>> &ModelRunner::GetRuntimeInfoMap(uint32_t model_id) const { | |||||
| static std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map; | |||||
| return runtime_info_map; | |||||
| } | |||||
| } // namespace model_runner | } // namespace model_runner | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -15,7 +15,6 @@ | |||||
| */ | */ | ||||
| #include "device/ascend/ascend_stream_assign.h" | #include "device/ascend/ascend_stream_assign.h" | ||||
| #include "device/ascend/ascend_label_assign.h" | #include "device/ascend/ascend_label_assign.h" | ||||
| #include "device/ascend/tasksink/task_generator.h" | |||||
| #include "device/kernel_adjust.h" | #include "device/kernel_adjust.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| @@ -31,13 +30,6 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr) | |||||
| void AscendStreamAssign::GetWaitStreams(vector<uint32_t> *wait_active_stream_list) { return; } | void AscendStreamAssign::GetWaitStreams(vector<uint32_t> *wait_active_stream_list) { return; } | ||||
| void AscendStreamAssign::GetHcomStreams(std::vector<uint32_t> *streams) { return; } | void AscendStreamAssign::GetHcomStreams(std::vector<uint32_t> *streams) { return; } | ||||
| namespace tasksink { | |||||
| bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *const task_info_list, | |||||
| uint32_t graph_id) { | |||||
| return true; | |||||
| } | |||||
| } // namespace tasksink | |||||
| } // namespace ascend | } // namespace ascend | ||||
| void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return; } | void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return; } | ||||
| bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return true; } | bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return true; } | ||||
| @@ -0,0 +1,30 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "device/ascend/tasksink/task_generator.h" | |||||
| namespace mindspore { | |||||
| namespace device { | |||||
| namespace ascend { | |||||
| namespace tasksink { | |||||
| bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *const task_info_list, | |||||
| uint32_t graph_id) { | |||||
| return true; | |||||
| } | |||||
| } // namespace tasksink | |||||
| } // namespace ascend | |||||
| } // namespace device | |||||
| } // namespace mindspore | |||||