Browse Source

Optimize Task sink memory usage.

pull/933/head
zhangxiaokun 5 years ago
parent
commit
b14112d79b
3 changed files with 24 additions and 2 deletions
  1. +8
    -1
      ge/graph/load/new_model_manager/davinci_model.cc
  2. +10
    -0
      ge/graph/load/new_model_manager/davinci_model.h
  3. +6
    -1
      ge/graph/load/new_model_manager/task_info/kernel_task_info.cc

+ 8
- 1
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -3021,7 +3021,14 @@ Status DavinciModel::MallocKnownArgs() {
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

// malloc dynamic and static hybrid memory
if (total_hybrid_args_size_ != 0) {
rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}
// malloc fixed addr memory, eg: rts op
if (total_fixed_addr_size_ != 0) {
GELOGI("Begin to allocate fixed addr.");


+ 10
- 0
ge/graph/load/new_model_manager/davinci_model.h View File

@@ -498,6 +498,14 @@ class DavinciModel {
return cur_args;
}
void SetTotalIOAddrs(const vector<void *> &io_addrs);
void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; }
uint32_t GetHybridArgsSize() {
return total_hybrid_args_size_;
}
void *GetCurrentHybridArgsAddr(uint32_t offset) {
void *cur_args = static_cast<char *>(hybrid_addrs_) + offset;
return cur_args;
}
void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size);
int64_t GetFixedAddrsSize(string tensor_name);
void *GetCurrentFixedAddr(int64_t offset) const {
@@ -975,6 +983,8 @@ class DavinciModel {
void *args_ = nullptr;
void *args_host_ = nullptr;
void *fixed_addrs_ = nullptr;
void *hybrid_addrs_ = nullptr;
uint32_t total_hybrid_args_size_ = 0;
int64_t total_fixed_addr_size_ = 0;
map<const void *, void *> known_input_data_info_;
map<const void *, void *> known_output_data_info_;


+ 6
- 1
ge/graph/load/new_model_manager/task_info/kernel_task_info.cc View File

@@ -444,7 +444,6 @@ void KernelTaskInfo::SetIoAddrs(const OpDescPtr &op_desc) {
vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc);
io_addrs_.insert(io_addrs_.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
}
return SUCCESS;
}

Status KernelTaskInfo::UpdateArgs() {
@@ -897,6 +896,12 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k

aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());

if (davinci_model_->IsKnownNode()) {
args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_);
InitDumpTask(sizeof(aicpu::AicpuParamHead));
return SUCCESS;
}
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc);
vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc);


Loading…
Cancel
Save