From 7fe901491a5287cf1bad93b8a5364e7fe56de064 Mon Sep 17 00:00:00 2001 From: luopengting Date: Fri, 19 Feb 2021 18:06:08 +0800 Subject: [PATCH] support recording stream execute order 1. add ExecNode and StreamRxecOrderRecorer 2. add json format and json dump for stream exec order 3. add stream exec order api 4. add in CMakeLists --- mindspore/ccsrc/debug/CMakeLists.txt | 1 + mindspore/ccsrc/debug/rdr/base_recorder.cc | 10 +- mindspore/ccsrc/debug/rdr/base_recorder.h | 2 +- .../ccsrc/debug/rdr/running_data_recorder.cc | 21 ++++ .../ccsrc/debug/rdr/running_data_recorder.h | 2 + .../debug/rdr/stream_exec_order_recorder.cc | 72 ++++++++++++ .../debug/rdr/stream_exec_order_recorder.h | 110 ++++++++++++++++++ mindspore/ccsrc/debug/rdr/string_recorder.cc | 2 +- .../device/ascend/ascend_stream_assign.cc | 10 ++ 9 files changed, 226 insertions(+), 4 deletions(-) create mode 100644 mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.cc create mode 100644 mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.h diff --git a/mindspore/ccsrc/debug/CMakeLists.txt b/mindspore/ccsrc/debug/CMakeLists.txt index a21688bb3d..78cb7fe684 100644 --- a/mindspore/ccsrc/debug/CMakeLists.txt +++ b/mindspore/ccsrc/debug/CMakeLists.txt @@ -13,6 +13,7 @@ if(ENABLE_DUMP_IR) "${CMAKE_CURRENT_SOURCE_DIR}/rdr/base_recorder.cc" "${CMAKE_CURRENT_SOURCE_DIR}/rdr/graph_exec_order_recorder.cc" "${CMAKE_CURRENT_SOURCE_DIR}/rdr/graph_recorder.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/rdr/stream_exec_order_recorder.cc" "${CMAKE_CURRENT_SOURCE_DIR}/rdr/string_recorder.cc" "${CMAKE_CURRENT_SOURCE_DIR}/rdr/recorder_manager.cc" "${CMAKE_CURRENT_SOURCE_DIR}/rdr/running_data_recorder.cc" diff --git a/mindspore/ccsrc/debug/rdr/base_recorder.cc b/mindspore/ccsrc/debug/rdr/base_recorder.cc index 4c3f22894a..52e97a11d0 100644 --- a/mindspore/ccsrc/debug/rdr/base_recorder.cc +++ b/mindspore/ccsrc/debug/rdr/base_recorder.cc @@ -37,9 +37,15 @@ void BaseRecorder::SetFilename(const std::string &filename) { } } -std::optional BaseRecorder::GetFileRealPath() { +std::optional BaseRecorder::GetFileRealPath(const std::string &suffix) { if (filename_.empty()) { - filename_ = module_ + "_" + tag_ + "_" + timestamp_; + filename_ = module_ + "_" + tag_; + if (!suffix.empty()) { + filename_ += "_" + suffix; + } + filename_ += "_" + timestamp_; + } else if (!suffix.empty()) { + filename_ += "_" + suffix; } std::string file_path = directory_ + filename_; auto realpath = Common::GetRealPath(file_path); diff --git a/mindspore/ccsrc/debug/rdr/base_recorder.h b/mindspore/ccsrc/debug/rdr/base_recorder.h index fd9f5faeeb..ef3e54596a 100644 --- a/mindspore/ccsrc/debug/rdr/base_recorder.h +++ b/mindspore/ccsrc/debug/rdr/base_recorder.h @@ -62,7 +62,7 @@ class BaseRecorder { std::string GetModule() const { return module_; } std::string GetTag() const { return tag_; } std::string GetTimeStamp() const { return timestamp_; } - std::optional GetFileRealPath(); + std::optional GetFileRealPath(const std::string &suffix = ""); void SetDirectory(const std::string &directory); void SetFilename(const std::string &filename); diff --git a/mindspore/ccsrc/debug/rdr/running_data_recorder.cc b/mindspore/ccsrc/debug/rdr/running_data_recorder.cc index ab37f97227..472a8c4a12 100644 --- a/mindspore/ccsrc/debug/rdr/running_data_recorder.cc +++ b/mindspore/ccsrc/debug/rdr/running_data_recorder.cc @@ -19,6 +19,7 @@ #include "debug/rdr/graph_exec_order_recorder.h" #include "debug/rdr/recorder_manager.h" #include "debug/rdr/string_recorder.h" +#include "debug/rdr/stream_exec_order_recorder.h" #include "mindspore/core/ir/func_graph.h" #include "mindspore/core/ir/anf.h" @@ -84,6 +85,15 @@ bool RecordString(SubModuleId module, const std::string &tag, const std::string return ans; } +bool RecordStreamExecOrder(const SubModuleId module, const std::string &tag, const int &graph_id, + const std::vector &exec_order) { + std::string submodule_name = std::string(GetSubModuleName(module)); + StreamExecOrderRecorderPtr stream_exec_order_recorder = + std::make_shared(submodule_name, tag, graph_id, exec_order); + bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(stream_exec_order_recorder)); + return ans; +} + void TriggerAll() { mindspore::RecorderManager::Instance().TriggerAll(); } #else @@ -119,6 +129,17 @@ bool RecordString(SubModuleId module, const std::string &tag, const std::string return false; } +bool RecordStreamExecOrder(const SubModuleId module, const std::string &tag, const int &graph_id, + const std::vector &exec_order) { + static bool already_printed = false; + if (already_printed) { + return false; + } + already_printed = true; + MS_LOG(WARNING) << "The RDR presently only support linux os."; + return false; +} + void TriggerAll() { static bool already_printed = false; if (already_printed) { diff --git a/mindspore/ccsrc/debug/rdr/running_data_recorder.h b/mindspore/ccsrc/debug/rdr/running_data_recorder.h index 483ec753ae..6ec032a1ae 100644 --- a/mindspore/ccsrc/debug/rdr/running_data_recorder.h +++ b/mindspore/ccsrc/debug/rdr/running_data_recorder.h @@ -33,6 +33,8 @@ bool RecordGraphExecOrder(const SubModuleId module, const std::string &tag, const std::vector &&final_exec_order); bool RecordString(SubModuleId module, const std::string &tag, const std::string &data, const std::string &filename = ""); +bool RecordStreamExecOrder(const SubModuleId module, const std::string &tag, const int &graph_id, + const std::vector &exec_order); void TriggerAll(); } // namespace RDR } // namespace mindspore diff --git a/mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.cc b/mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.cc new file mode 100644 index 0000000000..448618d879 --- /dev/null +++ b/mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.cc @@ -0,0 +1,72 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "debug/rdr/stream_exec_order_recorder.h" +#include +#include +#include "mindspore/core/ir/anf.h" +#include "mindspore/core/utils/log_adapter.h" +#include "backend/session/anf_runtime_algorithm.h" +#include "utils/utils.h" + +namespace mindspore { +std::string Vector2String(const std::vector &v) { + std::string str = ""; + for (size_t j = 0; j < v.size(); ++j) { + str += std::to_string(v[j]) + (j + 1 < v.size() ? "," : ""); + } + return str; +} + +json ExecNode::ExecNode2Json() { + json exec_node; + exec_node[kAttrIndex] = index_; + exec_node[kAttrNodeName] = node_name_; + exec_node[kAttrLogicId] = logic_id_; + exec_node[kAttrStreamId] = stream_id_; + exec_node[kAttrNodeInfo] = node_info_; + exec_node[kAttrEventId] = event_id_; + if (!label_ids_.empty()) { + exec_node[kAttrLabelId] = Vector2String(label_ids_); + } + if (!active_stream_ids_.empty()) { + exec_node[kAttrActiveStreamId] = Vector2String(active_stream_ids_); + } + + return exec_node; +} + +void StreamExecOrderRecorder::Export() { + std::string file_suffix = std::to_string(graph_id_); + auto realpath = GetFileRealPath(file_suffix); + if (!realpath.has_value()) { + return; + } + std::string real_file_path = realpath.value() + ".json"; + json exec_order_json = json::array(); + for (size_t i = 0; i < exec_order_.size(); ++i) { + exec_order_json.push_back(exec_order_[i]->ExecNode2Json()); + } + ChangeFileMode(real_file_path, S_IRWXU); + std::ofstream fout(real_file_path, std::ofstream::app); + if (!fout.is_open()) { + MS_LOG(WARNING) << "Open file for saving stream execute order failed. File path: '" << real_file_path << "'."; + return; + } + fout << exec_order_json.dump(2); + fout.close(); + ChangeFileMode(real_file_path, S_IRUSR); +} +} // namespace mindspore diff --git a/mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.h b/mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.h new file mode 100644 index 0000000000..8cfe0eb6e2 --- /dev/null +++ b/mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.h @@ -0,0 +1,110 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_DEBUG_RDR_STREAM_EXEC_ORDER_RECORDER_H_ +#define MINDSPORE_CCSRC_DEBUG_RDR_STREAM_EXEC_ORDER_RECORDER_H_ +#include +#include +#include +#include +#include "nlohmann/json.hpp" +#include "backend/session/anf_runtime_algorithm.h" +#include "debug/rdr/base_recorder.h" + +using json = nlohmann::json; + +constexpr auto kAttrNodeName = "node_name"; +constexpr auto kAttrLogicId = "logic_id"; +constexpr auto kAttrNodeInfo = "node_info"; +constexpr auto kAttrLabelId = "label_id"; +constexpr auto kAttrActiveStreamId = "active_stream_id"; + +namespace mindspore { +class ExecNode { + public: + ExecNode() {} + ExecNode(const size_t index, const std::string &node_name, const uint32_t &logic_id, const uint32_t &stream_id, + const std::string &node_info) + : index_(index), node_name_(node_name), logic_id_(logic_id), stream_id_(stream_id), node_info_(node_info) {} + void SetEventId(const uint32_t &event_id) { event_id_ = event_id; } + void SetLabelId(const uint32_t &label_id) { label_ids_.push_back(label_id); } + void SetActiveStreamId(const uint32_t &active_stream_id) { active_stream_ids_.push_back(active_stream_id); } + json ExecNode2Json(); + + private: + size_t index_; + std::string node_name_; + uint32_t logic_id_; + uint32_t stream_id_; + std::string node_info_; + uint32_t event_id_; + std::vector label_ids_; + std::vector active_stream_ids_; +}; +using ExecNodePtr = std::shared_ptr; + +class CNode; +using CNodePtr = std::shared_ptr; +class StreamExecOrderRecorder : public BaseRecorder { + public: + StreamExecOrderRecorder() : BaseRecorder() {} + StreamExecOrderRecorder(const std::string &module, const std::string &tag, const int &graph_id, + const std::vector &exec_order) + : BaseRecorder(module, tag), graph_id_(graph_id) { + // Extract information from execute order. + for (size_t i = 0; i < exec_order.size(); i++) { + CNodePtr cur_cnode_ptr = exec_order[i]; + MS_EXCEPTION_IF_NULL(cur_cnode_ptr); + + ExecNode exec_node = + ExecNode(i, cur_cnode_ptr->fullname_with_scope(), AnfAlgo::GetStreamDistinctionLabel(cur_cnode_ptr.get()), + AnfAlgo::GetStreamId(cur_cnode_ptr), cur_cnode_ptr->DebugString()); + + if (AnfAlgo::HasNodeAttr(kAttrEventId, cur_cnode_ptr)) { + exec_node.SetEventId(AnfAlgo::GetNodeAttr(cur_cnode_ptr, kAttrEventId)); + } + + if (AnfAlgo::HasNodeAttr(kAttrLabelIndex, cur_cnode_ptr)) { + exec_node.SetLabelId(AnfAlgo::GetNodeAttr(cur_cnode_ptr, kAttrLabelIndex)); + } + + if (AnfAlgo::HasNodeAttr(kAttrLabelSwitchList, cur_cnode_ptr)) { + auto label_list = AnfAlgo::GetNodeAttr>(cur_cnode_ptr, kAttrLabelSwitchList); + for (size_t j = 0; j < label_list.size(); ++j) { + exec_node.SetLabelId(label_list[j]); + } + } + + std::string active_stream_str; + if (AnfAlgo::HasNodeAttr(kAttrActiveStreamList, cur_cnode_ptr)) { + auto stream_list = AnfAlgo::GetNodeAttr>(cur_cnode_ptr, kAttrActiveStreamList); + for (size_t j = 0; j < stream_list.size(); ++j) { + exec_node.SetActiveStreamId(stream_list[j]); + } + } + ExecNodePtr exec_node_ptr = std::make_shared(exec_node); + exec_order_.push_back(std::move(exec_node_ptr)); + } + } + void SetModule(const std::string &module) { module_ = module; } + virtual void Export(); + + private: + std::vector exec_order_; + int graph_id_{0}; +}; +using StreamExecOrderRecorderPtr = std::shared_ptr; +} // namespace mindspore +#endif // MINDSPORE_CCSRC_DEBUG_RDR_STREAM_EXEC_ORDER_RECORDER_H_ diff --git a/mindspore/ccsrc/debug/rdr/string_recorder.cc b/mindspore/ccsrc/debug/rdr/string_recorder.cc index 9c8094adf5..983d952e05 100644 --- a/mindspore/ccsrc/debug/rdr/string_recorder.cc +++ b/mindspore/ccsrc/debug/rdr/string_recorder.cc @@ -30,7 +30,7 @@ void StringRecorder::Export() { ChangeFileMode(file_path, S_IRWXU); std::ofstream fout(file_path, std::ofstream::app); if (!fout.is_open()) { - MS_LOG(WARNING) << "Open file for saving string failed."; + MS_LOG(WARNING) << "Open file for saving string failed. File path: '" << file_path << "'."; return; } fout << data_; diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc index cbaa77a066..fd1a18ff97 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc @@ -28,6 +28,10 @@ #include "backend/kernel_compiler/oplib/oplib.h" #include "utils/utils.h" +#ifdef ENABLE_DUMP_IR +#include "debug/rdr/running_data_recorder.h" +#endif + namespace mindspore { namespace device { namespace ascend { @@ -108,6 +112,12 @@ void AscendStreamAssign::AssignStream(const NotNull &graph_ptr) GetNeedActiveStreams(graph_ptr); CheckResourceAssign(graph_ptr); MS_LOG(INFO) << "After finish stream assign"; +#ifdef ENABLE_DUMP_IR + SubModuleId module = SubModuleId::SM_SESSION; + std::string tag = "assign_stream"; + const std::vector &exec_order = graph_ptr->execution_order(); + mindspore::RDR::RecordStreamExecOrder(module, tag, graph_ptr->graph_id(), exec_order); +#endif graph_ptr->PrintGraphExecuteOrder(); FindStreamRelations(graph_ptr);