Browse Source

For step profiling.

pull/1308/head
unknown 4 years ago
parent
commit
efb5924795
9 changed files with 163 additions and 1 deletions
  1. +64
    -0
      ge/graph/load/model_manager/davinci_model.cc
  2. +2
    -0
      ge/graph/load/model_manager/davinci_model.h
  3. +64
    -0
      ge/hybrid/executor/hybrid_model_executor.cc
  4. +2
    -0
      ge/hybrid/executor/hybrid_model_executor.h
  5. +2
    -0
      tests/depends/runtime/src/runtime_stub.cc
  6. +1
    -0
      tests/ut/ge/CMakeLists.txt
  7. +6
    -0
      tests/ut/ge/graph/load/davinci_model_unittest.cc
  8. +16
    -1
      tests/ut/ge/hybrid/ge_hybrid_unittest.cc
  9. +6
    -0
      third_party/fwkacllib/inc/runtime/base.h

+ 64
- 0
ge/graph/load/model_manager/davinci_model.cc View File

@@ -111,6 +111,9 @@ const char *const kWorkSpaceSize = "workspace_size";
const char *const kTotalSize = "total_size"; const char *const kTotalSize = "total_size";
const char *const kTaskCount = "task_count"; const char *const kTaskCount = "task_count";
const char *const kTaskId = "task_id"; const char *const kTaskId = "task_id";
const char *const kIndexId = "index_id";
const char *const kTimeStamp = "time_stamp";
const char *const kTagId = "tag_id";
const char* const kRequestId = "request_id"; const char* const kRequestId = "request_id";
const char* const kThreadId = "thread_id"; const char* const kThreadId = "thread_id";
const char* const kInputBeginTime = "input_begin_time"; const char* const kInputBeginTime = "input_begin_time";
@@ -2305,6 +2308,59 @@ Status DavinciModel::SinkTimeProfile(const InputData &current_data) {
return SUCCESS; return SUCCESS;
} }


Status DavinciModel::ProfileStepInfo(DavinciModel *model, uint16_t tag_id) {
auto &prof_mgr = ProfilingManager::Instance();
if (prof_mgr.ProfilingModelExecuteOn()) {
uint64_t index_id = model->iterator_count_ + 1;
uint64_t model_id = static_cast<uint64_t>(model->Id());
rtStream_t stream = model->rt_model_stream_;
GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u",
index_id, model_id, tag_id);
rtError_t rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx] failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("Profiling Step Info TraceTask execute async success, index_id = %lu, model_id = %lu, tag_id = %u",
index_id, model_id, tag_id);

mmTimespec timespec = mmGetTickCount();
// 1000 ^ 3 converts second to nanosecond
int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec;
uint32_t task_id = 0;
uint32_t stream_id = 0;
rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[Get][RtsInfo] task_id and stream_id failed, ret: 0x%X.", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("Get profiling args, task_id[%u], stream_id[%u]", task_id, stream_id);
uint32_t device_id = model->GetDeviceId();

Json step_info;
step_info[kIndexId] = index_id;
step_info[kModeleId] = model_id;
step_info[kTimeStamp] = time;
step_info[kTagId] = tag_id;
step_info[kTaskId] = task_id;
step_info[kStreamId] = stream_id;
step_info[kThreadId] = mmGetTid();

std::string reported_data;
try {
reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
} catch (std::exception &e) {
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
} catch (...) {
GELOGE(FAILED, "Failed to convert JSON to string.");
}
reported_data.append(",")
.append("\n");
prof_mgr.ReportData(device_id, reported_data, "step_info");
}
return SUCCESS;
}

void DavinciModel::SetProfileTime(ModelProcStage stage, int64_t endTime) { void DavinciModel::SetProfileTime(ModelProcStage stage, int64_t endTime) {
int64_t time = endTime; int64_t time = endTime;


@@ -2599,6 +2655,8 @@ void *DavinciModel::Run(DavinciModel *model) {
GELOGI("data_wrapper is null!"); GELOGI("data_wrapper is null!");
continue; continue;
} }
// tag_id 0 means step begin, 1 meas step end.
(void)DavinciModel::ProfileStepInfo(model, 0);
GELOGI("Getting the input data, model_id:%u", model_id); GELOGI("Getting the input data, model_id:%u", model_id);
GE_IF_BOOL_EXEC(!model->RunFlag(), break); GE_IF_BOOL_EXEC(!model->RunFlag(), break);


@@ -2678,6 +2736,8 @@ void *DavinciModel::Run(DavinciModel *model) {
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(),
model->SetProfileTime(MODEL_AFTER_PROC_END)); model->SetProfileTime(MODEL_AFTER_PROC_END));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)model->SinkTimeProfile(current_data)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)model->SinkTimeProfile(current_data));
// tag_id 0 means step begin, 1 meas step end.
(void)DavinciModel::ProfileStepInfo(model, 1);


model->iterator_count_++; model->iterator_count_++;
model->is_first_execute_ = false; model->is_first_execute_ = false;
@@ -3691,6 +3751,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
GELOGD("current_data.index=%u", input_data.index); GELOGD("current_data.index=%u", input_data.index);
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END));


// tag_id 0 means step begin, 1 meas step end.
GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 0));
if (!task_list_.empty()) { if (!task_list_.empty()) {
GELOGD("rtModelExecute do"); GELOGD("rtModelExecute do");
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_START)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_START));
@@ -3698,7 +3760,9 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END));
GELOGD("rtModelExecute end"); GELOGD("rtModelExecute end");
iterator_count_++;
} }
GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 1));


if (!is_async_mode_) { if (!is_async_mode_) {
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START));


+ 2
- 0
ge/graph/load/model_manager/davinci_model.h View File

@@ -878,6 +878,8 @@ class DavinciModel {
Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node); Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node);
Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node); Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node);


static Status ProfileStepInfo(DavinciModel *model, uint16_t tag_id);

bool is_weight_mem_has_inited_; bool is_weight_mem_has_inited_;
bool is_feature_map_mem_has_inited_; bool is_feature_map_mem_has_inited_;




+ 64
- 0
ge/hybrid/executor/hybrid_model_executor.cc View File

@@ -18,12 +18,22 @@
#include "graph/ge_context.h" #include "graph/ge_context.h"
#include "graph/runtime_inference_context.h" #include "graph/runtime_inference_context.h"
#include "common/dump/dump_manager.h" #include "common/dump/dump_manager.h"
#include "common/profiling/profiling_manager.h"
#include "mmpa/mmpa_api.h"


namespace ge { namespace ge {
namespace hybrid { namespace hybrid {
namespace { namespace {
const int kIntBase = 10; const int kIntBase = 10;
const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL"; const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL";
const char *const kIndexId = "index_id";
const char *const kModeleId = "model_id";
const char *const kTimeStamp = "time_stamp";
const char *const kStreamId = "stream_id";
const char *const kTaskId = "task_id";
const char *const kTagId = "tag_id";
const char *const kThreadId = "thread_id";
const uint32_t kInteval = 2;
} // namespace } // namespace
HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream) HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream)
: model_(model), device_id_(device_id), stream_(stream) { : model_(model), device_id_(device_id), stream_(stream) {
@@ -71,15 +81,69 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
return SUCCESS; return SUCCESS;
} }


Status HybridModelExecutor::ProfileStepInfo(uint16_t tag_id) {
auto &prof_mgr = ProfilingManager::Instance();
if (prof_mgr.ProfilingModelExecuteOn()) {
uint64_t index_id = context_.iteration + 1;
uint64_t model_id = static_cast<uint64_t>(model_->GetModelId());
GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u",
index_id, model_id, tag_id);
rtError_t rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx] failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("Profiling Step Info TraceTask execute async success. index_id = %lu, model_id = %u, tag_id = %u",
index_id, model_id, tag_id);

mmTimespec timespec = mmGetTickCount();
// 1000 ^ 3 converts second to nanosecond
int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec;
uint32_t task_id = 0;
uint32_t stream_id = 0;
rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[Get][RtsInfo] task_id and stream_id failed, ret: 0x%X.", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("Get profiling args, task_id[%u], stream_id[%u]", task_id, stream_id);

Json step_info;
step_info[kIndexId] = index_id;
step_info[kModeleId] = model_id;
step_info[kTimeStamp] = time;
step_info[kTagId] = tag_id;
step_info[kTaskId] = task_id;
step_info[kStreamId] = stream_id;
step_info[kThreadId] = mmGetTid();

std::string reported_data;
try {
reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
} catch (std::exception &e) {
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
} catch (...) {
GELOGE(FAILED, "Failed to convert JSON to string.");
}
reported_data.append(",")
.append("\n");
prof_mgr.ReportData(device_id_, reported_data, "step_info");
}
return SUCCESS;
}

Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor,
HybridModelExecutor::ExecuteArgs &args) { HybridModelExecutor::ExecuteArgs &args) {
RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] Start"); RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] Start");
GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_));
RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End");


// tag_id 0 means step begin, 1 meas step end.
GE_CHK_STATUS_RET_NOLOG(ProfileStepBegin(0));
HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs), HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs),
"Failed to execute partitioned call."); "Failed to execute partitioned call.");
RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End");
GE_CHK_STATUS_RET_NOLOG(ProfileStepEnd(1));


HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph.");
RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End");


+ 2
- 0
ge/hybrid/executor/hybrid_model_executor.h View File

@@ -51,6 +51,8 @@ class HybridModelExecutor {
Status ExecuteGraphInternal(SubgraphExecutor &executor, ExecuteArgs &args); Status ExecuteGraphInternal(SubgraphExecutor &executor, ExecuteArgs &args);
Status Cleanup(); Status Cleanup();
Status InitExecutionContext(); Status InitExecutionContext();
Status ProfileStepInfo(uint16_t tag_id);

static Status ResetExecutionContext(GraphExecutionContext &context); static Status ResetExecutionContext(GraphExecutionContext &context);


HybridModel *model_; HybridModel *model_;


+ 2
- 0
tests/depends/runtime/src/runtime_stub.cc View File

@@ -313,6 +313,8 @@ rtError_t rtFlushCache(uint64_t base, uint32_t len) { return RT_ERROR_NONE; }


rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream_) { return RT_ERROR_NONE; } rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream_) { return RT_ERROR_NONE; }


rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stream_) { return RT_ERROR_NONE; }

rtError_t rtMemSetRC(const void *dev_ptr, uint64_t size, uint32_t read_count) { return RT_ERROR_NONE; } rtError_t rtMemSetRC(const void *dev_ptr, uint64_t size, uint32_t read_count) { return RT_ERROR_NONE; }


rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t value, rtStream_t true_stream, rtStream_t stream) { rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t value, rtStream_t true_stream, rtStream_t stream) {


+ 1
- 0
tests/ut/ge/CMakeLists.txt View File

@@ -845,6 +845,7 @@ target_link_libraries(ge_ut_common PRIVATE
ascend_protobuf ascend_protobuf
json json
ge_ut_graph ge_ut_graph
runtime_stub
) )


# build common format # build common format


+ 6
- 0
tests/ut/ge/graph/load/davinci_model_unittest.cc View File

@@ -898,6 +898,12 @@ TEST_F(UtestDavinciModel, Sink_time_profile) {
model.SinkTimeProfile(current_data); model.SinkTimeProfile(current_data);
} }


TEST_F(UtestDavinciModel, Step_info_profile) {
ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport;
DavinciModel model(0, nullptr);
EXPECT_EQ(DavinciModel::ProfileStepInfo(&model, 0), SUCCESS);
}

class ClassTest { class ClassTest {
public: public:
virtual ~ClassTest() {} virtual ~ClassTest() {}


+ 16
- 1
tests/ut/ge/hybrid/ge_hybrid_unittest.cc View File

@@ -37,6 +37,7 @@
#include "hybrid/common/npu_memory_allocator.h" #include "hybrid/common/npu_memory_allocator.h"
#include "graph/types.h" #include "graph/types.h"
#include "graph/utils/tensor_utils.h" #include "graph/utils/tensor_utils.h"
#include "common/profiling/profiling_manager.h"


#undef private #undef private
#undef protected #undef protected
@@ -46,6 +47,11 @@ using namespace testing;
using namespace ge; using namespace ge;
using namespace hybrid; using namespace hybrid;


namespace {
int32_t MsprofReport(uint32_t moduleId, uint32_t type, void *data, uint32_t len) {
return 0;
}
} // namespace


class UtestGeHybrid : public testing::Test { class UtestGeHybrid : public testing::Test {
protected: protected:
@@ -245,7 +251,7 @@ TEST_F(UtestGeHybrid, init_weight_success) {
ASSERT_EQ(ret,PARAM_INVALID); ASSERT_EQ(ret,PARAM_INVALID);
} }


TEST_F(UtestGeHybrid, hybrid_model_executor) {
TEST_F(UtestGeHybrid, hybrid_model_executor) {
ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>("abc"); ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>("abc");
GeRootModelPtr root_model = MakeShared<ge::GeRootModel>(compute_graph); GeRootModelPtr root_model = MakeShared<ge::GeRootModel>(compute_graph);
HybridModel model(root_model); HybridModel model(root_model);
@@ -256,3 +262,12 @@ TEST_F(UtestGeHybrid, init_weight_success) {
HybridModelExecutor executor(model_ptr, device_id, stream); HybridModelExecutor executor(model_ptr, device_id, stream);
executor.Init(); executor.Init();
} }

TEST_F(UtestGeHybrid, Step_info_profile) {
ProfilingManager::Instance().prof_cb_.msprofReporterCallback = MsprofReport;
auto graph = make_shared<ComputeGraph>("graph");
auto root_model = make_shared<GeRootModel>(graph);
HybridModel model(root_model);
HybridModelExecutor executor(&model, 0, nullptr);
ASSERT_EQ(executor.ProfileStepInfo(0), SUCCESS):
}

+ 6
- 0
third_party/fwkacllib/inc/runtime/base.h View File

@@ -154,6 +154,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t
*/ */
RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream); RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream);


/**
* @ingroup profiling_base
* @brief ts send keypoint profiler log.
*/
RTS_API rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stream_);

/** /**
* @ingroup profiling_base * @ingroup profiling_base
* @brief ts set profiling reporter callback. * @brief ts set profiling reporter callback.


Loading…
Cancel
Save