diff --git a/mindspore/ccsrc/debug/CMakeLists.txt b/mindspore/ccsrc/debug/CMakeLists.txt index a21688bb3d..78cb7fe684 100644 --- a/mindspore/ccsrc/debug/CMakeLists.txt +++ b/mindspore/ccsrc/debug/CMakeLists.txt @@ -13,6 +13,7 @@ if(ENABLE_DUMP_IR) "${CMAKE_CURRENT_SOURCE_DIR}/rdr/base_recorder.cc" "${CMAKE_CURRENT_SOURCE_DIR}/rdr/graph_exec_order_recorder.cc" "${CMAKE_CURRENT_SOURCE_DIR}/rdr/graph_recorder.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/rdr/stream_exec_order_recorder.cc" "${CMAKE_CURRENT_SOURCE_DIR}/rdr/string_recorder.cc" "${CMAKE_CURRENT_SOURCE_DIR}/rdr/recorder_manager.cc" "${CMAKE_CURRENT_SOURCE_DIR}/rdr/running_data_recorder.cc" diff --git a/mindspore/ccsrc/debug/rdr/base_recorder.cc b/mindspore/ccsrc/debug/rdr/base_recorder.cc index 4c3f22894a..52e97a11d0 100644 --- a/mindspore/ccsrc/debug/rdr/base_recorder.cc +++ b/mindspore/ccsrc/debug/rdr/base_recorder.cc @@ -37,9 +37,15 @@ void BaseRecorder::SetFilename(const std::string &filename) { } } -std::optional BaseRecorder::GetFileRealPath() { +std::optional BaseRecorder::GetFileRealPath(const std::string &suffix) { if (filename_.empty()) { - filename_ = module_ + "_" + tag_ + "_" + timestamp_; + filename_ = module_ + "_" + tag_; + if (!suffix.empty()) { + filename_ += "_" + suffix; + } + filename_ += "_" + timestamp_; + } else if (!suffix.empty()) { + filename_ += "_" + suffix; } std::string file_path = directory_ + filename_; auto realpath = Common::GetRealPath(file_path); diff --git a/mindspore/ccsrc/debug/rdr/base_recorder.h b/mindspore/ccsrc/debug/rdr/base_recorder.h index fd9f5faeeb..ef3e54596a 100644 --- a/mindspore/ccsrc/debug/rdr/base_recorder.h +++ b/mindspore/ccsrc/debug/rdr/base_recorder.h @@ -62,7 +62,7 @@ class BaseRecorder { std::string GetModule() const { return module_; } std::string GetTag() const { return tag_; } std::string GetTimeStamp() const { return timestamp_; } - std::optional GetFileRealPath(); + std::optional GetFileRealPath(const std::string &suffix = ""); void SetDirectory(const std::string &directory); void SetFilename(const std::string &filename); diff --git a/mindspore/ccsrc/debug/rdr/running_data_recorder.cc b/mindspore/ccsrc/debug/rdr/running_data_recorder.cc index ab37f97227..472a8c4a12 100644 --- a/mindspore/ccsrc/debug/rdr/running_data_recorder.cc +++ b/mindspore/ccsrc/debug/rdr/running_data_recorder.cc @@ -19,6 +19,7 @@ #include "debug/rdr/graph_exec_order_recorder.h" #include "debug/rdr/recorder_manager.h" #include "debug/rdr/string_recorder.h" +#include "debug/rdr/stream_exec_order_recorder.h" #include "mindspore/core/ir/func_graph.h" #include "mindspore/core/ir/anf.h" @@ -84,6 +85,15 @@ bool RecordString(SubModuleId module, const std::string &tag, const std::string return ans; } +bool RecordStreamExecOrder(const SubModuleId module, const std::string &tag, const int &graph_id, + const std::vector &exec_order) { + std::string submodule_name = std::string(GetSubModuleName(module)); + StreamExecOrderRecorderPtr stream_exec_order_recorder = + std::make_shared(submodule_name, tag, graph_id, exec_order); + bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(stream_exec_order_recorder)); + return ans; +} + void TriggerAll() { mindspore::RecorderManager::Instance().TriggerAll(); } #else @@ -119,6 +129,17 @@ bool RecordString(SubModuleId module, const std::string &tag, const std::string return false; } +bool RecordStreamExecOrder(const SubModuleId module, const std::string &tag, const int &graph_id, + const std::vector &exec_order) { + static bool already_printed = false; + if (already_printed) { + return false; + } + already_printed = true; + MS_LOG(WARNING) << "The RDR presently only support linux os."; + return false; +} + void TriggerAll() { static bool already_printed = false; if (already_printed) { diff --git a/mindspore/ccsrc/debug/rdr/running_data_recorder.h b/mindspore/ccsrc/debug/rdr/running_data_recorder.h index 483ec753ae..6ec032a1ae 100644 --- a/mindspore/ccsrc/debug/rdr/running_data_recorder.h +++ b/mindspore/ccsrc/debug/rdr/running_data_recorder.h @@ -33,6 +33,8 @@ bool RecordGraphExecOrder(const SubModuleId module, const std::string &tag, const std::vector &&final_exec_order); bool RecordString(SubModuleId module, const std::string &tag, const std::string &data, const std::string &filename = ""); +bool RecordStreamExecOrder(const SubModuleId module, const std::string &tag, const int &graph_id, + const std::vector &exec_order); void TriggerAll(); } // namespace RDR } // namespace mindspore diff --git a/mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.cc b/mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.cc new file mode 100644 index 0000000000..448618d879 --- /dev/null +++ b/mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.cc @@ -0,0 +1,72 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "debug/rdr/stream_exec_order_recorder.h" +#include +#include +#include "mindspore/core/ir/anf.h" +#include "mindspore/core/utils/log_adapter.h" +#include "backend/session/anf_runtime_algorithm.h" +#include "utils/utils.h" + +namespace mindspore { +std::string Vector2String(const std::vector &v) { + std::string str = ""; + for (size_t j = 0; j < v.size(); ++j) { + str += std::to_string(v[j]) + (j + 1 < v.size() ? "," : ""); + } + return str; +} + +json ExecNode::ExecNode2Json() { + json exec_node; + exec_node[kAttrIndex] = index_; + exec_node[kAttrNodeName] = node_name_; + exec_node[kAttrLogicId] = logic_id_; + exec_node[kAttrStreamId] = stream_id_; + exec_node[kAttrNodeInfo] = node_info_; + exec_node[kAttrEventId] = event_id_; + if (!label_ids_.empty()) { + exec_node[kAttrLabelId] = Vector2String(label_ids_); + } + if (!active_stream_ids_.empty()) { + exec_node[kAttrActiveStreamId] = Vector2String(active_stream_ids_); + } + + return exec_node; +} + +void StreamExecOrderRecorder::Export() { + std::string file_suffix = std::to_string(graph_id_); + auto realpath = GetFileRealPath(file_suffix); + if (!realpath.has_value()) { + return; + } + std::string real_file_path = realpath.value() + ".json"; + json exec_order_json = json::array(); + for (size_t i = 0; i < exec_order_.size(); ++i) { + exec_order_json.push_back(exec_order_[i]->ExecNode2Json()); + } + ChangeFileMode(real_file_path, S_IRWXU); + std::ofstream fout(real_file_path, std::ofstream::app); + if (!fout.is_open()) { + MS_LOG(WARNING) << "Open file for saving stream execute order failed. File path: '" << real_file_path << "'."; + return; + } + fout << exec_order_json.dump(2); + fout.close(); + ChangeFileMode(real_file_path, S_IRUSR); +} +} // namespace mindspore diff --git a/mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.h b/mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.h new file mode 100644 index 0000000000..8cfe0eb6e2 --- /dev/null +++ b/mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.h @@ -0,0 +1,110 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_DEBUG_RDR_STREAM_EXEC_ORDER_RECORDER_H_ +#define MINDSPORE_CCSRC_DEBUG_RDR_STREAM_EXEC_ORDER_RECORDER_H_ +#include +#include +#include +#include +#include "nlohmann/json.hpp" +#include "backend/session/anf_runtime_algorithm.h" +#include "debug/rdr/base_recorder.h" + +using json = nlohmann::json; + +constexpr auto kAttrNodeName = "node_name"; +constexpr auto kAttrLogicId = "logic_id"; +constexpr auto kAttrNodeInfo = "node_info"; +constexpr auto kAttrLabelId = "label_id"; +constexpr auto kAttrActiveStreamId = "active_stream_id"; + +namespace mindspore { +class ExecNode { + public: + ExecNode() {} + ExecNode(const size_t index, const std::string &node_name, const uint32_t &logic_id, const uint32_t &stream_id, + const std::string &node_info) + : index_(index), node_name_(node_name), logic_id_(logic_id), stream_id_(stream_id), node_info_(node_info) {} + void SetEventId(const uint32_t &event_id) { event_id_ = event_id; } + void SetLabelId(const uint32_t &label_id) { label_ids_.push_back(label_id); } + void SetActiveStreamId(const uint32_t &active_stream_id) { active_stream_ids_.push_back(active_stream_id); } + json ExecNode2Json(); + + private: + size_t index_; + std::string node_name_; + uint32_t logic_id_; + uint32_t stream_id_; + std::string node_info_; + uint32_t event_id_; + std::vector label_ids_; + std::vector active_stream_ids_; +}; +using ExecNodePtr = std::shared_ptr; + +class CNode; +using CNodePtr = std::shared_ptr; +class StreamExecOrderRecorder : public BaseRecorder { + public: + StreamExecOrderRecorder() : BaseRecorder() {} + StreamExecOrderRecorder(const std::string &module, const std::string &tag, const int &graph_id, + const std::vector &exec_order) + : BaseRecorder(module, tag), graph_id_(graph_id) { + // Extract information from execute order. + for (size_t i = 0; i < exec_order.size(); i++) { + CNodePtr cur_cnode_ptr = exec_order[i]; + MS_EXCEPTION_IF_NULL(cur_cnode_ptr); + + ExecNode exec_node = + ExecNode(i, cur_cnode_ptr->fullname_with_scope(), AnfAlgo::GetStreamDistinctionLabel(cur_cnode_ptr.get()), + AnfAlgo::GetStreamId(cur_cnode_ptr), cur_cnode_ptr->DebugString()); + + if (AnfAlgo::HasNodeAttr(kAttrEventId, cur_cnode_ptr)) { + exec_node.SetEventId(AnfAlgo::GetNodeAttr(cur_cnode_ptr, kAttrEventId)); + } + + if (AnfAlgo::HasNodeAttr(kAttrLabelIndex, cur_cnode_ptr)) { + exec_node.SetLabelId(AnfAlgo::GetNodeAttr(cur_cnode_ptr, kAttrLabelIndex)); + } + + if (AnfAlgo::HasNodeAttr(kAttrLabelSwitchList, cur_cnode_ptr)) { + auto label_list = AnfAlgo::GetNodeAttr>(cur_cnode_ptr, kAttrLabelSwitchList); + for (size_t j = 0; j < label_list.size(); ++j) { + exec_node.SetLabelId(label_list[j]); + } + } + + std::string active_stream_str; + if (AnfAlgo::HasNodeAttr(kAttrActiveStreamList, cur_cnode_ptr)) { + auto stream_list = AnfAlgo::GetNodeAttr>(cur_cnode_ptr, kAttrActiveStreamList); + for (size_t j = 0; j < stream_list.size(); ++j) { + exec_node.SetActiveStreamId(stream_list[j]); + } + } + ExecNodePtr exec_node_ptr = std::make_shared(exec_node); + exec_order_.push_back(std::move(exec_node_ptr)); + } + } + void SetModule(const std::string &module) { module_ = module; } + virtual void Export(); + + private: + std::vector exec_order_; + int graph_id_{0}; +}; +using StreamExecOrderRecorderPtr = std::shared_ptr; +} // namespace mindspore +#endif // MINDSPORE_CCSRC_DEBUG_RDR_STREAM_EXEC_ORDER_RECORDER_H_ diff --git a/mindspore/ccsrc/debug/rdr/string_recorder.cc b/mindspore/ccsrc/debug/rdr/string_recorder.cc index 9c8094adf5..983d952e05 100644 --- a/mindspore/ccsrc/debug/rdr/string_recorder.cc +++ b/mindspore/ccsrc/debug/rdr/string_recorder.cc @@ -30,7 +30,7 @@ void StringRecorder::Export() { ChangeFileMode(file_path, S_IRWXU); std::ofstream fout(file_path, std::ofstream::app); if (!fout.is_open()) { - MS_LOG(WARNING) << "Open file for saving string failed."; + MS_LOG(WARNING) << "Open file for saving string failed. File path: '" << file_path << "'."; return; } fout << data_; diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc index cbaa77a066..fd1a18ff97 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc @@ -28,6 +28,10 @@ #include "backend/kernel_compiler/oplib/oplib.h" #include "utils/utils.h" +#ifdef ENABLE_DUMP_IR +#include "debug/rdr/running_data_recorder.h" +#endif + namespace mindspore { namespace device { namespace ascend { @@ -108,6 +112,12 @@ void AscendStreamAssign::AssignStream(const NotNull &graph_ptr) GetNeedActiveStreams(graph_ptr); CheckResourceAssign(graph_ptr); MS_LOG(INFO) << "After finish stream assign"; +#ifdef ENABLE_DUMP_IR + SubModuleId module = SubModuleId::SM_SESSION; + std::string tag = "assign_stream"; + const std::vector &exec_order = graph_ptr->execution_order(); + mindspore::RDR::RecordStreamExecOrder(module, tag, graph_ptr->graph_id(), exec_order); +#endif graph_ptr->PrintGraphExecuteOrder(); FindStreamRelations(graph_ptr);