Browse Source

Pre Merge pull request !825 from 储星/dynamic_infer

pull/825/MERGE
储星 Gitee 5 years ago
parent
commit
a2c2d06daf
15 changed files with 129 additions and 21 deletions
  1. +0
    -9
      ge/CMakeLists.txt
  2. +1
    -1
      ge/hybrid/common/tensor_value.cc
  3. +2
    -1
      ge/hybrid/executor/hybrid_model_executor.cc
  4. +42
    -2
      ge/hybrid/executor/subgraph_executor.cc
  5. +14
    -1
      ge/hybrid/executor/subgraph_executor.h
  6. +6
    -2
      ge/hybrid/model/hybrid_model.cc
  7. +1
    -1
      ge/hybrid/model/hybrid_model.h
  8. +12
    -0
      ge/hybrid/model/hybrid_model_builder.cc
  9. +1
    -0
      ge/hybrid/model/hybrid_model_builder.h
  10. +1
    -1
      ge/hybrid/node_executor/task_context.cc
  11. +19
    -2
      ge/single_op/single_op.cc
  12. +4
    -1
      ge/single_op/single_op.h
  13. +21
    -0
      ge/single_op/single_op_model.cc
  14. +4
    -0
      ge/single_op/stream_resource.cc
  15. +1
    -0
      ge/single_op/stream_resource.h

+ 0
- 9
ge/CMakeLists.txt View File

@@ -583,15 +583,6 @@ set(INFER_SRC_LIST
"graph/load/new_model_manager/task_info/model_exit_task_info.cc"
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
"single_op/task/op_task.cc"
"single_op/task/build_task_utils.cc"
"single_op/task/tbe_task_builder.cc"
"single_op/task/aicpu_task_builder.cc"
"single_op/task/aicpu_kernel_task_builder.cc"
"single_op/single_op.cc"
"single_op/single_op_model.cc"
"single_op/stream_resource.cc"
"single_op/single_op_manager.cc"
"hybrid/hybrid_davinci_model_stub.cc"
"ir_build/ge_ir_build.cc"
"ir_build/atc_ir_common.cc"


+ 1
- 1
ge/hybrid/common/tensor_value.cc View File

@@ -71,7 +71,7 @@ TensorValue::TensorValue(void *buffer, size_t size) : ref_buffer_(buffer), ref_s
TensorValue::~TensorValue() { Destroy(); }

void TensorValue::Destroy() {
if (buffer_ != nullptr || ref_buffer_ != nullptr) {
if (buffer_ != nullptr) {
GELOGD("Unref tensor: %s", DebugString().c_str());
buffer_.reset();
}


+ 2
- 1
ge/hybrid/executor/hybrid_model_executor.cc View File

@@ -68,7 +68,8 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor,
GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_));
RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End");

GE_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc), "Failed to execute partitioned call.");
GE_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs),
"Failed to execute partitioned call.");
RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End");

GE_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph.");


+ 42
- 2
ge/hybrid/executor/subgraph_executor.cc View File

@@ -131,10 +131,14 @@ Status SubgraphExecutor::InitInputsForKnownShape(const std::vector<TensorValue>
}

Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
const std::vector<ConstGeTensorDescPtr> &input_desc) {
const std::vector<ConstGeTensorDescPtr> &input_desc,
const std::vector<TensorValue> &outputs) {
GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false");
GE_CHK_STATUS_RET(Init(inputs, input_desc), "[%s] Failed to init executor.", graph_item_->GetName().c_str());

if (!outputs.empty()) {
GE_CHK_STATUS_RET(EnableOutputZeroCopy(outputs),
"Failed to enable output zero copy by user provided outputs.");
}
if (!graph_item_->IsDynamic()) {
return ExecuteAsyncForKnownShape(inputs);
}
@@ -144,6 +148,11 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
return SUCCESS;
}

Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
const std::vector<ConstGeTensorDescPtr> &input_desc) {
return ExecuteAsync(inputs, input_desc, {});
}

Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector<TensorValue> &inputs) {
GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str());
if (graph_item_->GetAllNodes().size() != 1) {
@@ -413,5 +422,36 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) {

return SUCCESS;
}
Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs) {
GELOGD("To enable zero copy, output number = %zu", outputs.size());
const auto &output_edges = graph_item_->GetOutputEdges();
// Op -> MetOutput, set the output tensor of Op that output to the NetOutput node
if (outputs.size() != output_edges.size()) {
GELOGE(PARAM_INVALID, "Output number mismatches, expect = %zu, but given = %zu",
output_edges.size(),
outputs.size());
return PARAM_INVALID;
}

for (size_t i = 0; i < outputs.size(); ++i) {
auto &output_tensor = outputs[i];
auto &output_node = output_edges[i].first;
int output_idx = output_edges[i].second;
GELOGD("[%s] Set output tensor[%zu] to [%s]'s output[%d], tensor = %s",
graph_item_->GetName().c_str(),
i,
output_node->NodeName().c_str(),
output_idx,
output_tensor.DebugString().c_str());

GE_CHK_STATUS_RET(subgraph_context_->SetOutput(*output_node, output_idx, output_tensor),
"[%s] Failed to set input tensor[%zu]",
graph_item_->GetName().c_str(),
i);
}

GELOGD("Done enabling zero copy for outputs successfully.");
return SUCCESS;
}
} // namespace hybrid
} // namespace ge

+ 14
- 1
ge/hybrid/executor/subgraph_executor.h View File

@@ -43,7 +43,19 @@ class SubgraphExecutor {
* @param input_desc input tensor descriptions
* @return SUCCESS on success, error code otherwise
*/
Status ExecuteAsync(const std::vector<TensorValue> &inputs, const std::vector<ConstGeTensorDescPtr> &input_desc);
Status ExecuteAsync(const std::vector<TensorValue> &inputs,
const std::vector<ConstGeTensorDescPtr> &input_desc);

/**
* Execute subgraph async, output tensor address(not data) and output tensor descriptions are
* valid after this method returned
* @param inputs input tensors
* @param input_desc input tensor descriptions
* @return SUCCESS on success, error code otherwise
*/
Status ExecuteAsync(const std::vector<TensorValue> &inputs,
const std::vector<ConstGeTensorDescPtr> &input_desc,
const std::vector<TensorValue> &outputs);

/**
* Execute subgraph async, output tensor address(not data) and output tensor descriptions are
@@ -75,6 +87,7 @@ class SubgraphExecutor {
Status GetOutputs(std::vector<TensorValue> &outputs, std::vector<ConstGeTensorDescPtr> &output_desc);

private:
Status EnableOutputZeroCopy(const std::vector<TensorValue> &outputs);
static Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state);
static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state);
Status Init(const std::vector<TensorValue> &inputs,


+ 6
- 2
ge/hybrid/model/hybrid_model.cc View File

@@ -40,9 +40,13 @@ HybridModel::~HybridModel() {
GELOGD("[%s] HybridModel destroyed.", model_name_.c_str());
}

Status HybridModel::Init() {
Status HybridModel::Init(bool is_single_op) {
GELOGD("Start to init hybrid model.");
GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model.");
if (is_single_op) {
GE_CHK_STATUS_RET(HybridModelBuilder(*this).BuildForSingleOp(), "Failed to build hybrid model.");
} else {
GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model.");
}
GELOGD("HybridModel initialized successfully.");
return SUCCESS;
}


+ 1
- 1
ge/hybrid/model/hybrid_model.h View File

@@ -37,7 +37,7 @@ class HybridModel {

~HybridModel();

Status Init();
Status Init(bool is_single_op = false);

const NodeItem *GetNodeItem(const NodePtr &node) const;



+ 12
- 0
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -138,6 +138,18 @@ Status HybridModelBuilder::Build() {
return SUCCESS;
}

Status HybridModelBuilder::BuildForSingleOp() {
GE_CHK_STATUS_RET(ValidateParams(), "Failed to validate GeRootModel");
hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName();
GELOGI("[%s] Start to build hybrid model.", GetGraphName());
GE_CHK_STATUS_RET(IndexTaskDefs(), "[%s] Failed to index task defs", GetGraphName());
GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName());
GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName());
GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName());
GELOGI("[%s] Done building hybrid model for single op successfully.", GetGraphName());
return SUCCESS;
}

Status HybridModelBuilder::ValidateParams() {
GE_CHECK_NOTNULL(ge_root_model_);
GE_CHECK_NOTNULL(ge_root_model_->GetRootGraph());


+ 1
- 0
ge/hybrid/model/hybrid_model_builder.h View File

@@ -35,6 +35,7 @@ class HybridModelBuilder {
explicit HybridModelBuilder(HybridModel &hybrid_model);
~HybridModelBuilder() = default;
Status Build();
Status BuildForSingleOp();

private:
static Status UpdateAnchorStatus(const NodePtr &node);


+ 1
- 1
ge/hybrid/node_executor/task_context.cc View File

@@ -388,7 +388,7 @@ Status TaskContext::PropagateOutputs() {
subgraph_context_->all_inputs_[input_offset] = *tensor;
if (execution_context_->trace_enabled) {
subgraph_context_->all_inputs_[input_offset].SetName(
node_item_->NodeName() + "_in_" + std::to_string(dst_input_idx));
dst_node_item ->NodeName() + "_in_" + std::to_string(dst_input_idx));
}
}
}


+ 19
- 2
ge/single_op/single_op.cc View File

@@ -254,10 +254,27 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
const vector<DataBuffer> &input_buffers,
vector<GeTensorDesc> &output_desc,
vector<DataBuffer> &output_buffers) {
GE_CHECK_NOTNULL(op_task_);
GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers));
std::lock_guard<std::mutex> lk(*stream_mutex_);
if (hybrid_model_executor_ != nullptr) {
GELOGD("Execute multi-task dynamic single op by hybrid model executor");
hybrid::HybridModelExecutor::ExecuteArgs args;
for (auto &input : input_buffers) {
args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length));
}
for (auto &output : output_buffers) {
args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length));
}
for (auto &tensor_desc : input_desc) {
auto desc = MakeShared<GeTensorDesc>(tensor_desc);
GE_CHECK_NOTNULL(desc);
args.input_desc.emplace_back(desc);
}

return hybrid_model_executor_->Execute(args);
}

std::lock_guard<std::mutex> lk(*stream_mutex_);
GE_CHECK_NOTNULL(op_task_);
GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_));
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic));
return SUCCESS;


+ 4
- 1
ge/single_op/single_op.h View File

@@ -28,6 +28,7 @@
#include "runtime/stream.h"
#include "task/op_task.h"
#include "cce/aicpu_engine_struct.h"
#include "hybrid/executor/hybrid_model_executor.h"

namespace ge {
class StreamResource;
@@ -46,7 +47,7 @@ class SingleOp {
Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);

friend class SingleOpModel;
StreamResource *stream_resource_;
StreamResource *stream_resource_ = nullptr;
std::mutex *stream_mutex_;
rtStream_t stream_ = nullptr;
std::vector<void *> input_addr_list_;
@@ -77,6 +78,8 @@ class DynamicSingleOp {
std::vector<DataBuffer> &outputs) const;

std::unique_ptr<OpTask> op_task_;
std::unique_ptr<hybrid::HybridModel> hybrid_model_;
std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_;
uintptr_t resource_id_ = 0;
std::mutex *stream_mutex_;
rtStream_t stream_ = nullptr;


+ 21
- 0
ge/single_op/single_op_model.cc View File

@@ -31,6 +31,8 @@
#include "task/aicpu_task_builder.h"
#include "task/aicpu_kernel_task_builder.h"
#include "task/tbe_task_builder.h"
#include "hybrid/executor/hybrid_model_executor.h"
#include "hybrid/node_executor/node_executor.h"

static std::atomic<std::uint64_t> aicpu_kernel_id(0);

@@ -477,6 +479,25 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
single_op.num_inputs_ = data_ops_.size();
single_op.num_outputs_ = netoutput_op_->GetAllInputsSize();
GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource));

auto ge_model = model_helper_.GetGeModel();
GE_CHECK_NOTNULL(ge_model);
if (ge_model->GetModelTaskDefPtr()->task_size() > 1) {
GELOGD("Build single op with multiple tasks");
GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized());
single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(model_helper_.GetGeRootModel()));
GE_CHECK_NOTNULL(single_op.hybrid_model_);
GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "Failed to init hybrid model");
int32_t device_id = 0;
GE_CHK_RT_RET(rtGetDevice(&device_id));
single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(),
device_id,
resource.GetStream()));
GE_CHECK_NOTNULL(single_op.hybrid_model_executor_);
GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "Failed to init hybrid model");
return SUCCESS;
}

return BuildTaskListForDynamicOp(single_op);
}
} // namespace ge

+ 4
- 0
ge/single_op/stream_resource.cc View File

@@ -61,6 +61,10 @@ DynamicSingleOp *StreamResource::GetDynamicOperator(const void *key) {
return it->second.get();
}

rtStream_t StreamResource::GetStream() const {
return stream_;
}

void StreamResource::SetStream(rtStream_t stream) {
stream_ = stream;
}


+ 1
- 0
ge/single_op/stream_resource.h View File

@@ -37,6 +37,7 @@ class StreamResource {
StreamResource(StreamResource &&) = delete;
StreamResource &operator=(const StreamResource &) = delete;
StreamResource &operator=(StreamResource &&) = delete;
rtStream_t GetStream() const;
void SetStream(rtStream_t stream);

SingleOp *GetOperator(const void *key);


Loading…
Cancel
Save