Browse Source

support recording stream execute order

1. add ExecNode and StreamRxecOrderRecorer
2. add json format and json dump for stream exec order
3. add stream exec order api
4. add in CMakeLists
tags/v1.2.0-rc1
luopengting 4 years ago
parent
commit
7fe901491a
9 changed files with 226 additions and 4 deletions
  1. +1
    -0
      mindspore/ccsrc/debug/CMakeLists.txt
  2. +8
    -2
      mindspore/ccsrc/debug/rdr/base_recorder.cc
  3. +1
    -1
      mindspore/ccsrc/debug/rdr/base_recorder.h
  4. +21
    -0
      mindspore/ccsrc/debug/rdr/running_data_recorder.cc
  5. +2
    -0
      mindspore/ccsrc/debug/rdr/running_data_recorder.h
  6. +72
    -0
      mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.cc
  7. +110
    -0
      mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.h
  8. +1
    -1
      mindspore/ccsrc/debug/rdr/string_recorder.cc
  9. +10
    -0
      mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc

+ 1
- 0
mindspore/ccsrc/debug/CMakeLists.txt View File

@@ -13,6 +13,7 @@ if(ENABLE_DUMP_IR)
"${CMAKE_CURRENT_SOURCE_DIR}/rdr/base_recorder.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/rdr/graph_exec_order_recorder.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/rdr/graph_recorder.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/rdr/stream_exec_order_recorder.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/rdr/string_recorder.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/rdr/recorder_manager.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/rdr/running_data_recorder.cc"


+ 8
- 2
mindspore/ccsrc/debug/rdr/base_recorder.cc View File

@@ -37,9 +37,15 @@ void BaseRecorder::SetFilename(const std::string &filename) {
}
}

std::optional<std::string> BaseRecorder::GetFileRealPath() {
std::optional<std::string> BaseRecorder::GetFileRealPath(const std::string &suffix) {
if (filename_.empty()) {
filename_ = module_ + "_" + tag_ + "_" + timestamp_;
filename_ = module_ + "_" + tag_;
if (!suffix.empty()) {
filename_ += "_" + suffix;
}
filename_ += "_" + timestamp_;
} else if (!suffix.empty()) {
filename_ += "_" + suffix;
}
std::string file_path = directory_ + filename_;
auto realpath = Common::GetRealPath(file_path);


+ 1
- 1
mindspore/ccsrc/debug/rdr/base_recorder.h View File

@@ -62,7 +62,7 @@ class BaseRecorder {
std::string GetModule() const { return module_; }
std::string GetTag() const { return tag_; }
std::string GetTimeStamp() const { return timestamp_; }
std::optional<std::string> GetFileRealPath();
std::optional<std::string> GetFileRealPath(const std::string &suffix = "");

void SetDirectory(const std::string &directory);
void SetFilename(const std::string &filename);


+ 21
- 0
mindspore/ccsrc/debug/rdr/running_data_recorder.cc View File

@@ -19,6 +19,7 @@
#include "debug/rdr/graph_exec_order_recorder.h"
#include "debug/rdr/recorder_manager.h"
#include "debug/rdr/string_recorder.h"
#include "debug/rdr/stream_exec_order_recorder.h"
#include "mindspore/core/ir/func_graph.h"
#include "mindspore/core/ir/anf.h"

@@ -84,6 +85,15 @@ bool RecordString(SubModuleId module, const std::string &tag, const std::string
return ans;
}

bool RecordStreamExecOrder(const SubModuleId module, const std::string &tag, const int &graph_id,
const std::vector<CNodePtr> &exec_order) {
std::string submodule_name = std::string(GetSubModuleName(module));
StreamExecOrderRecorderPtr stream_exec_order_recorder =
std::make_shared<StreamExecOrderRecorder>(submodule_name, tag, graph_id, exec_order);
bool ans = mindspore::RecorderManager::Instance().RecordObject(std::move(stream_exec_order_recorder));
return ans;
}

void TriggerAll() { mindspore::RecorderManager::Instance().TriggerAll(); }

#else
@@ -119,6 +129,17 @@ bool RecordString(SubModuleId module, const std::string &tag, const std::string
return false;
}

bool RecordStreamExecOrder(const SubModuleId module, const std::string &tag, const int &graph_id,
const std::vector<CNodePtr> &exec_order) {
static bool already_printed = false;
if (already_printed) {
return false;
}
already_printed = true;
MS_LOG(WARNING) << "The RDR presently only support linux os.";
return false;
}

void TriggerAll() {
static bool already_printed = false;
if (already_printed) {


+ 2
- 0
mindspore/ccsrc/debug/rdr/running_data_recorder.h View File

@@ -33,6 +33,8 @@ bool RecordGraphExecOrder(const SubModuleId module, const std::string &tag,
const std::vector<CNodePtr> &&final_exec_order);
bool RecordString(SubModuleId module, const std::string &tag, const std::string &data,
const std::string &filename = "");
bool RecordStreamExecOrder(const SubModuleId module, const std::string &tag, const int &graph_id,
const std::vector<CNodePtr> &exec_order);
void TriggerAll();
} // namespace RDR
} // namespace mindspore


+ 72
- 0
mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.cc View File

@@ -0,0 +1,72 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "debug/rdr/stream_exec_order_recorder.h"
#include <sstream>
#include <fstream>
#include "mindspore/core/ir/anf.h"
#include "mindspore/core/utils/log_adapter.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "utils/utils.h"

namespace mindspore {
std::string Vector2String(const std::vector<uint32_t> &v) {
std::string str = "";
for (size_t j = 0; j < v.size(); ++j) {
str += std::to_string(v[j]) + (j + 1 < v.size() ? "," : "");
}
return str;
}

json ExecNode::ExecNode2Json() {
json exec_node;
exec_node[kAttrIndex] = index_;
exec_node[kAttrNodeName] = node_name_;
exec_node[kAttrLogicId] = logic_id_;
exec_node[kAttrStreamId] = stream_id_;
exec_node[kAttrNodeInfo] = node_info_;
exec_node[kAttrEventId] = event_id_;
if (!label_ids_.empty()) {
exec_node[kAttrLabelId] = Vector2String(label_ids_);
}
if (!active_stream_ids_.empty()) {
exec_node[kAttrActiveStreamId] = Vector2String(active_stream_ids_);
}

return exec_node;
}

void StreamExecOrderRecorder::Export() {
std::string file_suffix = std::to_string(graph_id_);
auto realpath = GetFileRealPath(file_suffix);
if (!realpath.has_value()) {
return;
}
std::string real_file_path = realpath.value() + ".json";
json exec_order_json = json::array();
for (size_t i = 0; i < exec_order_.size(); ++i) {
exec_order_json.push_back(exec_order_[i]->ExecNode2Json());
}
ChangeFileMode(real_file_path, S_IRWXU);
std::ofstream fout(real_file_path, std::ofstream::app);
if (!fout.is_open()) {
MS_LOG(WARNING) << "Open file for saving stream execute order failed. File path: '" << real_file_path << "'.";
return;
}
fout << exec_order_json.dump(2);
fout.close();
ChangeFileMode(real_file_path, S_IRUSR);
}
} // namespace mindspore

+ 110
- 0
mindspore/ccsrc/debug/rdr/stream_exec_order_recorder.h View File

@@ -0,0 +1,110 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_DEBUG_RDR_STREAM_EXEC_ORDER_RECORDER_H_
#define MINDSPORE_CCSRC_DEBUG_RDR_STREAM_EXEC_ORDER_RECORDER_H_
#include <vector>
#include <string>
#include <memory>
#include <utility>
#include "nlohmann/json.hpp"
#include "backend/session/anf_runtime_algorithm.h"
#include "debug/rdr/base_recorder.h"

using json = nlohmann::json;

constexpr auto kAttrNodeName = "node_name";
constexpr auto kAttrLogicId = "logic_id";
constexpr auto kAttrNodeInfo = "node_info";
constexpr auto kAttrLabelId = "label_id";
constexpr auto kAttrActiveStreamId = "active_stream_id";

namespace mindspore {
class ExecNode {
public:
ExecNode() {}
ExecNode(const size_t index, const std::string &node_name, const uint32_t &logic_id, const uint32_t &stream_id,
const std::string &node_info)
: index_(index), node_name_(node_name), logic_id_(logic_id), stream_id_(stream_id), node_info_(node_info) {}
void SetEventId(const uint32_t &event_id) { event_id_ = event_id; }
void SetLabelId(const uint32_t &label_id) { label_ids_.push_back(label_id); }
void SetActiveStreamId(const uint32_t &active_stream_id) { active_stream_ids_.push_back(active_stream_id); }
json ExecNode2Json();

private:
size_t index_;
std::string node_name_;
uint32_t logic_id_;
uint32_t stream_id_;
std::string node_info_;
uint32_t event_id_;
std::vector<uint32_t> label_ids_;
std::vector<uint32_t> active_stream_ids_;
};
using ExecNodePtr = std::shared_ptr<ExecNode>;

class CNode;
using CNodePtr = std::shared_ptr<CNode>;
class StreamExecOrderRecorder : public BaseRecorder {
public:
StreamExecOrderRecorder() : BaseRecorder() {}
StreamExecOrderRecorder(const std::string &module, const std::string &tag, const int &graph_id,
const std::vector<CNodePtr> &exec_order)
: BaseRecorder(module, tag), graph_id_(graph_id) {
// Extract information from execute order.
for (size_t i = 0; i < exec_order.size(); i++) {
CNodePtr cur_cnode_ptr = exec_order[i];
MS_EXCEPTION_IF_NULL(cur_cnode_ptr);

ExecNode exec_node =
ExecNode(i, cur_cnode_ptr->fullname_with_scope(), AnfAlgo::GetStreamDistinctionLabel(cur_cnode_ptr.get()),
AnfAlgo::GetStreamId(cur_cnode_ptr), cur_cnode_ptr->DebugString());

if (AnfAlgo::HasNodeAttr(kAttrEventId, cur_cnode_ptr)) {
exec_node.SetEventId(AnfAlgo::GetNodeAttr<uint32_t>(cur_cnode_ptr, kAttrEventId));
}

if (AnfAlgo::HasNodeAttr(kAttrLabelIndex, cur_cnode_ptr)) {
exec_node.SetLabelId(AnfAlgo::GetNodeAttr<uint32_t>(cur_cnode_ptr, kAttrLabelIndex));
}

if (AnfAlgo::HasNodeAttr(kAttrLabelSwitchList, cur_cnode_ptr)) {
auto label_list = AnfAlgo::GetNodeAttr<std::vector<uint32_t>>(cur_cnode_ptr, kAttrLabelSwitchList);
for (size_t j = 0; j < label_list.size(); ++j) {
exec_node.SetLabelId(label_list[j]);
}
}

std::string active_stream_str;
if (AnfAlgo::HasNodeAttr(kAttrActiveStreamList, cur_cnode_ptr)) {
auto stream_list = AnfAlgo::GetNodeAttr<std::vector<uint32_t>>(cur_cnode_ptr, kAttrActiveStreamList);
for (size_t j = 0; j < stream_list.size(); ++j) {
exec_node.SetActiveStreamId(stream_list[j]);
}
}
ExecNodePtr exec_node_ptr = std::make_shared<ExecNode>(exec_node);
exec_order_.push_back(std::move(exec_node_ptr));
}
}
void SetModule(const std::string &module) { module_ = module; }
virtual void Export();

private:
std::vector<ExecNodePtr> exec_order_;
int graph_id_{0};
};
using StreamExecOrderRecorderPtr = std::shared_ptr<StreamExecOrderRecorder>;
} // namespace mindspore
#endif // MINDSPORE_CCSRC_DEBUG_RDR_STREAM_EXEC_ORDER_RECORDER_H_

+ 1
- 1
mindspore/ccsrc/debug/rdr/string_recorder.cc View File

@@ -30,7 +30,7 @@ void StringRecorder::Export() {
ChangeFileMode(file_path, S_IRWXU);
std::ofstream fout(file_path, std::ofstream::app);
if (!fout.is_open()) {
MS_LOG(WARNING) << "Open file for saving string failed.";
MS_LOG(WARNING) << "Open file for saving string failed. File path: '" << file_path << "'.";
return;
}
fout << data_;


+ 10
- 0
mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc View File

@@ -28,6 +28,10 @@
#include "backend/kernel_compiler/oplib/oplib.h"
#include "utils/utils.h"

#ifdef ENABLE_DUMP_IR
#include "debug/rdr/running_data_recorder.h"
#endif

namespace mindspore {
namespace device {
namespace ascend {
@@ -108,6 +112,12 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr)
GetNeedActiveStreams(graph_ptr);
CheckResourceAssign(graph_ptr);
MS_LOG(INFO) << "After finish stream assign";
#ifdef ENABLE_DUMP_IR
SubModuleId module = SubModuleId::SM_SESSION;
std::string tag = "assign_stream";
const std::vector<CNodePtr> &exec_order = graph_ptr->execution_order();
mindspore::RDR::RecordStreamExecOrder(module, tag, graph_ptr->graph_id(), exec_order);
#endif
graph_ptr->PrintGraphExecuteOrder();

FindStreamRelations(graph_ptr);


Loading…
Cancel
Save