Browse Source

For aicpu all_shape compile.

pull/929/head
unknown 5 years ago
parent
commit
b42a7efe8c
9 changed files with 87 additions and 24 deletions
  1. +4
    -2
      ge/generator/ge_generator.cc
  2. +43
    -10
      ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
  3. +1
    -0
      ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h
  4. +7
    -0
      ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
  5. +11
    -11
      ge/graph/manager/graph_manager.cc
  6. +14
    -1
      ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc
  7. +3
    -0
      ge/hybrid/node_executor/aicpu/aicpu_ext_info.h
  8. +2
    -0
      ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
  9. +2
    -0
      ge/single_op/task/op_task.cc

+ 4
- 2
ge/generator/ge_generator.cc View File

@@ -721,8 +721,10 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
GeModelPtr &ge_model = name_to_ge_model.begin()->second;
GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str());

bool dynamic_flag = false;
if (CheckShapeReset(op_desc, dynamic_flag) == SUCCESS && dynamic_flag) {
bool all_shape = false;
(void)AttrUtils::GetBool(op_desc, "_AllShape", all_shape);
if (all_shape) {
GELOGD("Get aicpu all_shape kernel!");
vector<GeTensor> inputs_dynamic;
vector<GeTensor> outputs_dynamic;
GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(inputs, inputs_dynamic));


+ 43
- 10
ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc View File

@@ -28,6 +28,47 @@
#include "graph/load/new_model_manager/model_manager.h"

namespace ge {
Status KernelExTaskInfo::InitTaskExtInfo(const std::string &ext_info) {
if (ext_info.empty()) {
return SUCCESS;
}
std::unique_ptr<uint8_t[]> copy_ext_info;
copy_ext_info.reset(new(std::nothrow)uint8_t[ext_info.size()]);
GE_CHECK_NOTNULL(copy_ext_info);
auto sec_ret = memcpy_s(copy_ext_info.get(), ext_info.size(), ext_info.c_str(), ext_info.size());
if (sec_ret != EOK) {
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
return FAILED;
}

auto ext_info_data = copy_ext_info.get();
size_t offset = 0;
while (offset + sizeof(aicpu::FWKAdapter::ExtInfo) <= ext_info.size()) {
auto aicpu_ext_info = reinterpret_cast<aicpu::FWKAdapter::ExtInfo *>(ext_info_data + offset);
GELOGD("Ext infoType=%d, infoLen=%u.", aicpu_ext_info->infoType, aicpu_ext_info->infoLen);
if (aicpu_ext_info->infoType == aicpu::FWKAdapter::FWK_ADPT_EXT_BITMAP) {
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(uint64_t), PARAM_INVALID,
"Node[%s] parse bit_map info failed as infoLen must be %zu but %u.",
node_name_.c_str(), sizeof(uint64_t), aicpu_ext_info->infoLen);
uint64_t bit_map = reinterpret_cast<uint64_t *>(aicpu_ext_info->infoMsg);
*(bit_map) |= 1;
GELOGD("Update aicpu_task ext_info bit_map to 1.");
}
offset += sizeof(aicpu::FWKAdapter::ExtInfo);
offset += aicpu_ext_info->infoLen;
}

auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
return RT_ERROR_TO_GE_STATUS(rt_ret);)
rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info_data, ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
return RT_ERROR_TO_GE_STATUS(rt_ret);)
return SUCCESS;
}

Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
GELOGI("KernelExTaskInfo Init Start.");
GE_CHECK_NOTNULL(davinci_model);
@@ -63,16 +104,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
}

const auto &ext_info = kernel_ex_def.kernel_ext_info();
if (!ext_info.empty()) {
auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
return RT_ERROR_TO_GE_STATUS(rt_ret);)
rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
return RT_ERROR_TO_GE_STATUS(rt_ret);)
}
GE_CHK_STATUS_RET(InitTaskExtInfo(ext_info),
"Init aicpu tf_task ext info failed, ext_info size=%zu", ext_info.size());

GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc->GetName().c_str(),
op_desc->GetType().c_str(), ext_info.size(), ext_info_addr_);


+ 1
- 0
ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h View File

@@ -62,6 +62,7 @@ class KernelExTaskInfo : public TaskInfo {
void SetIoAddrs(const OpDescPtr &op_desc);

void InitDumpTask(void *addr, const OpDescPtr &op_desc);
Status InitTaskExtInfo(const std::string &ext_info);

uint32_t task_id_;
uint32_t stream_id_;


+ 7
- 0
ge/graph/load/new_model_manager/task_info/kernel_task_info.cc View File

@@ -986,6 +986,13 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
session_info->sessionId = davinci_model_->GetSessionId();
session_info->sessFlag = true;
GELOGD("Update aicpu_task ext_info session_info session_id is %lu", session_info->sessionId);
} else if (aicpu_ext_info->infoType == aicpu::FWKAdapter::FWK_ADPT_EXT_BITMAP) {
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(uint64_t), PARAM_INVALID,
"Node[%s] parse bit_map info failed as infoLen must be %zu but %u.",
node_name_.c_str(), sizeof(uint64_t), aicpu_ext_info->infoLen);
uint64_t bit_map = reinterpret_cast<uint64_t *>(aicpu_ext_info->infoMsg);
*(bit_map) |= 1;
GELOGD("Update aicpu_task ext_info bit_map to 1.");
}
offset += sizeof(aicpu::FWKAdapter::ExtInfo);
offset += aicpu_ext_info->infoLen;


+ 11
- 11
ge/graph/manager/graph_manager.cc View File

@@ -641,22 +641,22 @@ Status GraphManager::ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_

Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph, GraphPartitioner &partitioner) {
GE_CHECK_NOTNULL(compute_graph);
PassManager pass_for_dynamic_shape_reset_optimize;
GE_CHK_STATUS_RET(pass_for_dynamic_shape_reset_optimize.AddPass(
"SetSubgraph::AfterSetSubgraph::DynamicSingleOpResetShapePass", new (std::nothrow) DynamicSingleOpResetShapePass))
GE_TIMESTAMP_START(pass_for_dynamic_shape_reset_optimize);
Status ret = pass_for_dynamic_shape_reset_optimize.Run(compute_graph);
GE_TIMESTAMP_END(pass_for_dynamic_shape_reset_optimize, "SetSubgraph::AfterSetSubgraph");
if (ret != SUCCESS && ret != NOT_CHANGED) {
GELOGE(ret, "Run passes when optimize subgraph failed");
return ret;
}
// PassManager pass_for_dynamic_shape_reset_optimize;
// GE_CHK_STATUS_RET(pass_for_dynamic_shape_reset_optimize.AddPass(
// "SetSubgraph::AfterSetSubgraph::DynamicSingleOpResetShapePass", new (std::nothrow) DynamicSingleOpResetShapePass))
// GE_TIMESTAMP_START(pass_for_dynamic_shape_reset_optimize);
// Status ret = pass_for_dynamic_shape_reset_optimize.Run(compute_graph);
// GE_TIMESTAMP_END(pass_for_dynamic_shape_reset_optimize, "SetSubgraph::AfterSetSubgraph");
// if (ret != SUCCESS && ret != NOT_CHANGED) {
// GELOGE(ret, "Run passes when optimize subgraph failed");
// return ret;
// }

auto sub_graph_map = partitioner.GetSubGraphMap();
GELOGD("Directly optimize subgraph with build mode:%s, and step:%s.",
options_.build_mode.c_str(),
options_.build_step.c_str());
ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id);
Status ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id);
if (ret != SUCCESS) {
GELOGE(ret, "Multiply optimize subgraph failed");
return ret;


+ 14
- 1
ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc View File

@@ -64,7 +64,7 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) {
GE_CHK_STATUS_RET(ParseExtSessionInfo(aicpu_ext_info), "Parse ext session info failed.");
break;
case aicpu::FWKAdapter::FWK_ADPT_EXT_BITMAP:
GE_CHK_STATUS_RET(ParseExtBitMap(aicpu_ext_info), "Parse ext session info failed.");
GE_CHK_STATUS_RET(ParseExtBitMap(aicpu_ext_info), "Parse ext bit map failed.");
break;
default:
GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.",
@@ -152,6 +152,19 @@ Status AicpuExtInfoHandler::ParseExtBitMap(AicpuExtInfo *aicpu_ext_info) {
return SUCCESS;
}

Status AicpuExtInfoHandler::UpdateBitMap(bool flag) {
if (bit_map_ == nullptr) {
GELOGD("There is no bit_map in ext_info, no need update.");
return SUCCESS;
}
if (flag) {
*(bit_map_) |= 1;
} else {
*(bit_map_) &= ~1;
}
return SUCCESS;
}

Status AicpuExtInfoHandler::UpdateSessionInfo(uint64_t session_id, uint64_t kernel_id, bool sess_flag) {
if (session_info_ == nullptr) {
GELOGD("There is no session info in ext_info, no need update.");


+ 3
- 0
ge/hybrid/node_executor/aicpu/aicpu_ext_info.h View File

@@ -57,6 +57,8 @@ class AicpuExtInfoHandler {

Status UpdateSessionInfoSessionId(uint64_t session_id);

Status UpdateBitMap(bool flag);

Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type);

private:
@@ -65,6 +67,7 @@ class AicpuExtInfoHandler {
Status ParseExtInputShape(AicpuExtInfo *aicpu_ext_info);
Status ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info);
Status ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info);
Status ParseExtBitMap(AicpuExtInfo *aicpu_ext_info);

static Status UpdateShapeAndType(const GeShape &shape,
DataType data_type,


+ 2
- 0
ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc View File

@@ -61,6 +61,7 @@ Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info, int64_
GELOGD("To update aicpu_task ext_info session_info session_id to %lu", session_id);
GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateSessionInfoSessionId(session_id),
"UpdateSessionInfoSessionId failed.");
GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateBitMap(true), "UpdateBitMap failed.");

// copy task args buf
GE_CHK_STATUS_RET(AllocTensorBuffer(aicpu_ext_handle_.GetExtInfoLen(), ext_info_addr_dev_),
@@ -137,6 +138,7 @@ Status AicpuNodeTaskBase::UpdateExtInfo() {
return SUCCESS;
}

GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateBitMap(false), "UpdateBitMap failed.");
for (auto i = 0; i < node_item_->num_inputs; ++i) {
auto input_desc = node_item_->MutableInputDesc(i);
GE_CHECK_NOTNULL(input_desc);


+ 2
- 0
ge/single_op/task/op_task.cc View File

@@ -366,6 +366,7 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint

GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateSessionInfo(ULLONG_MAX, kernel_id, false),
"UpdateSessionInfo failed.");
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateBitMap(true), "UpdateBitMap failed.");

GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), RT_MEMORY_HBM));
GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(),
@@ -403,6 +404,7 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc,
}

GE_CHECK_NOTNULL(aicpu_ext_handle_);
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateBitMap(false), "UpdateBitMap failed.");

size_t non_const_index = 0;
for (size_t input_index = 0; input_index < num_inputs_; input_index++) {


Loading…
Cancel
Save