Browse Source

Pre Merge pull request !1253 from 赵之轩/my_dev

pull/1253/MERGE
赵之轩 Gitee 4 years ago
parent
commit
436279add0
7 changed files with 131 additions and 4 deletions
  1. +1
    -0
      ge/executor/ge_executor.cc
  2. +63
    -0
      ge/graph/load/model_manager/davinci_model.cc
  3. +1
    -0
      ge/graph/load/model_manager/davinci_model.h
  4. +60
    -0
      ge/hybrid/executor/hybrid_model_executor.cc
  5. +2
    -0
      ge/hybrid/executor/hybrid_model_executor.h
  6. +1
    -1
      ge/hybrid/node_executor/task_context.cc
  7. +3
    -3
      ge/hybrid/node_executor/task_context.h

+ 1
- 0
ge/executor/ge_executor.cc View File

@@ -852,6 +852,7 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat
*/
Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
ge::RunModelData &run_output_data, bool async_mode) {

std::vector<GeTensorDesc> input_desc = {};
std::vector<GeTensorDesc> output_desc = {};
return ExecModel(model_id, stream, run_input_data, input_desc, run_output_data, output_desc, async_mode);


+ 63
- 0
ge/graph/load/model_manager/davinci_model.cc View File

@@ -119,6 +119,9 @@ const char* const kInferBeginTime = "infer_begin_time";
const char* const kInferEndTime = "infer_end_time";
const char* const kOutputBeginTime = "output_start_time";
const char* const kOutputEndTime = "output_end_time";
const char *const kIndexId = "index_id";
const char *const kTimeStamp = "time_stamp";
const char *const kTagId = "tag_id";
const uint32_t kStringHeadElems = 2;

inline bool IsDataOp(const std::string &node_type) {
@@ -2305,6 +2308,57 @@ Status DavinciModel::SinkTimeProfile(const InputData &current_data) {
return SUCCESS;
}

Status DavinciModel::ProfileStepInfo(DavinciModel *model, uint16_t tag_id) {
auto &prof_mgr = ProfilingManager::Instance();
if (prof_mgr.ProfilingModelExecuteOn()) {
mmTimespec timespec = mmGetTickCount();
int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec;
uint32_t task_id = 0;
uint32_t stream_id = 0;
// stream_id 和 task_id 此处可能不太对,应该是在执行后才得到 task_id
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
uint32_t model_id = model->Id();
uint32_t device_id = model->GetDeviceId();
uint64_t index_id = model->iterator_count_;
rtStream_t stream = model->rt_model_stream_;

Json step_info;
step_info[kIndexId] = index_id;
step_info[kModeleId] = model_id;
step_info[kTimeStamp] = time;
step_info[kTagId] = tag_id;
step_info[kTaskId] = task_id;
step_info[kStreamId] = stream_id;

std::string reported_data;
try {
reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
} catch (std::exception &e) {
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
} catch (...) {
GELOGE(FAILED, "Failed to convert JSON to string.");
}
reported_data.append(",")
.append("\n");
prof_mgr.ReportData(device_id, reported_data, "step_info");

GELOGD("Profiling Step Info TraceTask execute async start. index_id = %lu, model_id= %u, tag_id = %u",
index_id, model_id, tag_id);
rtError_t rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("Profiling Step Info TraceTask execute async success. index_id = %lu, model_id= %u, tag_id = %u",
index_id, model_id, tag_id);
}
return SUCCESS;
}

void DavinciModel::SetProfileTime(ModelProcStage stage, int64_t endTime) {
int64_t time = endTime;

@@ -2599,6 +2653,8 @@ void *DavinciModel::Run(DavinciModel *model) {
GELOGI("data_wrapper is null!");
continue;
}
// tag_id is 0 means step begin, 1 meas step end.
GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(model, 0));
GELOGI("Getting the input data, model_id:%u", model_id);
GE_IF_BOOL_EXEC(!model->RunFlag(), break);

@@ -2678,6 +2734,8 @@ void *DavinciModel::Run(DavinciModel *model) {
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(),
model->SetProfileTime(MODEL_AFTER_PROC_END));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)model->SinkTimeProfile(current_data));
// tag_id is 0 means step begin, 1 meas step end.
GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(model, 1));

model->iterator_count_++;
model->is_first_execute_ = false;
@@ -3691,6 +3749,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
GELOGD("current_data.index=%u", input_data.index);
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END));

// tag_id is 0 means step begin, 1 meas step end.
GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 0));
if (!task_list_.empty()) {
GELOGD("rtModelExecute do");
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_START));
@@ -3698,7 +3758,10 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END));
GELOGD("rtModelExecute end");
iterator_count_++;
}
// tag_id is 0 means step begin, 1 meas step end.
GE_CHK_STATUS_RET_NOLOG(ProfileStepInfo(this, 1));

if (!is_async_mode_) {
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START));


+ 1
- 0
ge/graph/load/model_manager/davinci_model.h View File

@@ -853,6 +853,7 @@ class DavinciModel {
Status SinkModelProfile();

Status SinkTimeProfile(const InputData &current_data);
Status ProfileStepInfo(DavinciModel *model, uint16_t tag_id);

Status InitOutputTensorInfo(const OpDescPtr &op_desc);
Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs);


+ 60
- 0
ge/hybrid/executor/hybrid_model_executor.cc View File

@@ -15,6 +15,8 @@
*/

#include "hybrid_model_executor.h"
#include <utility>

#include "graph/ge_context.h"
#include "graph/runtime_inference_context.h"
#include "common/dump/dump_manager.h"
@@ -24,6 +26,13 @@ namespace hybrid {
namespace {
const int kIntBase = 10;
const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL";
const char *const kIndexId = "index_id";
const char *const kModeleId = "model_id";
const char *const kTimeStamp = "time_stamp";
const char *const kStreamId = "stream_id";
const char *const kTaskId = "task_id";
const char *const kTagId = "tag_id";
const uint32_t kInteval = 2;
} // namespace
HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream)
: model_(model), device_id_(device_id), stream_(stream) {
@@ -53,7 +62,10 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration,
sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream));
SubgraphExecutor executor(model_->GetRootGraphItem(), &context_);
// tag_id is 0 means step begin, 1 meas step end.
GE_CHK_STATUS_RET_NOLOG(ProfileStepBegin(0));
auto ret = ExecuteGraphInternal(executor, args);
GE_CHK_STATUS_RET_NOLOG(ProfileStepEnd(1));
Cleanup();
RECORD_MODEL_EXECUTION_EVENT(&context_, "[Cleanup] End");
GELOGD("Model executed successfully.");
@@ -71,6 +83,54 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
return SUCCESS;
}

Status HybridModelExecutor::ProfileStepInfo(uint16_t tag_id) {
auto &prof_mgr = ProfilingManager::Instance();
if (prof_mgr.ProfilingModelExecuteOn()) {
mmTimespec timespec = mmGetTickCount();
int64_t time = timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec;
uint32_t task_id = 0;
uint32_t stream_id = 0;
// stream_id 和 task_id 此处可能不太对,应该是在执行后才得到 task_id
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
uint64_t index_id = context_.iteration;
uint32_t model_id = model_.GetModelId();

Json step_info;
step_info[kIndexId] = index_id;
step_info[kModeleId] = model_id;
step_info[kTimeStamp] = time;
step_info[kTagId] = tag_id;
step_info[kTaskId] = task_id;
step_info[kStreamId] = stream_id;

std::string reported_data;
try {
reported_data = step_info.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
} catch (std::exception &e) {
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
} catch (...) {
GELOGE(FAILED, "Failed to convert JSON to string.");
}
reported_data.append(",")
.append("\n");
prof_mgr.ReportData(device_id_, reported_data, "step_info");

GELOGD("Profiling Step Info TraceTask execute async start. index_id = %lu, model_id= %u, tag_id = %u",
index_id, model_id, tag_id);
rtError_t rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("Profiling Step Info TraceTask execute async success. index_id = %lu, model_id= %u, tag_id = %u",
index_id, model_id, tag_id);
}
}

Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor,
HybridModelExecutor::ExecuteArgs &args) {
RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] Start");


+ 2
- 0
ge/hybrid/executor/hybrid_model_executor.h View File

@@ -51,6 +51,8 @@ class HybridModelExecutor {
Status ExecuteGraphInternal(SubgraphExecutor &executor, ExecuteArgs &args);
Status Cleanup();
Status InitExecutionContext();
Status ProfileStepInfo(uint16_t tag_id);

static Status ResetExecutionContext(GraphExecutionContext &context);

HybridModel *model_;


+ 1
- 1
ge/hybrid/node_executor/task_context.cc View File

@@ -544,7 +544,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream
tmp_task_desc_info.stream_id = stream_id;
tmp_task_desc_info.shape_type = "dynamic";
tmp_task_desc_info.cur_iter_num = iteration_ + 1;
task_desc_info.emplace_back(tmp_task_desc_info);
task_desc_info_.emplace_back(tmp_task_desc_info);
}

return SUCCESS;


+ 3
- 3
ge/hybrid/node_executor/task_context.h View File

@@ -116,10 +116,10 @@ class TaskContext {
void SetForceInferShape(bool force_infer_shape);
void *handle_ = nullptr;

const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; }
const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info_; }
Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id,
const std::string &task_type, uint32_t block_dim);
void ClearProfilingTaskDescInfo() { task_desc_info.clear(); }
void ClearProfilingTaskDescInfo() { task_desc_info_.clear(); }

private:
TaskContext(GraphExecutionContext *execution_context,
@@ -141,7 +141,7 @@ class TaskContext {
uint64_t iteration_ = 0;
uint32_t task_id_ = 0;
uint32_t stream_id_ = 0;
std::vector<TaskDescInfo> task_desc_info;
std::vector<TaskDescInfo> task_desc_info_;
bool is_over_flow_ = false;
};
} // namespace hybrid


Loading…
Cancel
Save