Browse Source

!14827 support the output address of graph reapply

From: @limingqi107
Reviewed-by: @cristoval,@cristoval,@wilfchen
Signed-off-by: @wilfchen
pull/14827/MERGE
mindspore-ci-bot Gitee 4 years ago
parent
commit
d17b4abb09
6 changed files with 117 additions and 40 deletions
  1. +14
    -1
      mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
  2. +1
    -33
      mindspore/ccsrc/backend/session/executor.cc
  3. +43
    -0
      mindspore/ccsrc/backend/session/gpu_session.cc
  4. +3
    -0
      mindspore/ccsrc/backend/session/gpu_session.h
  5. +53
    -6
      mindspore/ccsrc/backend/session/session_basic.cc
  6. +3
    -0
      mindspore/ccsrc/backend/session/session_basic.h

+ 14
- 1
mindspore/ccsrc/backend/session/anf_runtime_algorithm.h View File

@@ -38,9 +38,22 @@
namespace mindspore {
namespace session {
using AnfVisitFuncion = std::function<Any(const AnfNodePtr &node, int index)>;
using KernelWithIndex = std::pair<AnfNodePtr, size_t>;
using DeviceAddress = device::DeviceAddress;
using DeviceAddressPtr = device::DeviceAddressPtr;

using KernelWithIndex = std::pair<AnfNodePtr, size_t>;
struct KernelWithIndexCmp {
bool operator()(const KernelWithIndex &key1, const KernelWithIndex &key2) const {
if (key1.first != key2.first) {
return key1.first < key2.first;
}
if (key1.second != key2.second) {
return key1.second < key2.second;
}
return false;
}
};

class AnfRuntimeAlgorithm {
public:
static AnfNodePtr MakeMonadValueNode(const KernelGraphPtr &kg);


+ 1
- 33
mindspore/ccsrc/backend/session/executor.cc View File

@@ -29,38 +29,6 @@ using mindspore::tensor::TensorPy;
namespace mindspore {
namespace session {
namespace {
void UpdateOutputTensors(const VectorRef *outputs,
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node) {
MS_EXCEPTION_IF_NULL(outputs);
for (auto &item : *outputs) {
if (utils::isa<VectorRefPtr>(item)) {
auto vector_ref = utils::cast<VectorRef>(item);
UpdateOutputTensors(&vector_ref, tensor_to_node);
} else if (utils::isa<tensor::TensorPtr>(item)) {
auto tensor = utils::cast<tensor::TensorPtr>(item);
MS_EXCEPTION_IF_NULL(tensor);
auto iter = tensor_to_node.find(tensor);
if (iter != tensor_to_node.end()) {
auto &node = iter->second.first;
auto &output_index = iter->second.second;
auto address = AnfAlgo::GetMutableOutputAddr(node, output_index);
tensor->set_device_address(address);
if (AnfAlgo::IsDynamicShape(node)) {
auto updated_shape = AnfAlgo::GetOutputInferShape(node, output_index);
ShapeVector int_shape;
std::transform(updated_shape.begin(), updated_shape.end(), std::back_inserter(int_shape), SizeToInt);
tensor->set_shape(int_shape);
}
}
if (tensor->NeedSyncDeviceToHostImmediately()) {
tensor->data_sync(false);
tensor->set_device_address(nullptr);
tensor->set_sync_status(kNeedSyncHostToDevice);
}
}
}
}

void SetOutputTensorsWaitStatus(const VectorRef *outputs) {
MS_EXCEPTION_IF_NULL(outputs);
for (auto &item : *outputs) {
@@ -163,7 +131,7 @@ void RunGraphTask::Run() {
try {
session_->LoadInputs(graph_id_, input_tensors_);
session_->RunGraphImpl(graph_id_, input_tensors_, &outputs_);
UpdateOutputTensors(&outputs_, tensor_to_node_);
session_->UpdateOutputTensors(&outputs_, tensor_to_node_);
} catch (const std::exception &e) {
ExecutorManager::Instance().OnEvent(ExecutorEvent::kException);
MsException::Instance().SetException();


+ 43
- 0
mindspore/ccsrc/backend/session/gpu_session.cc View File

@@ -66,6 +66,7 @@
#include "runtime/device/gpu/cuda_driver.h"
#include "runtime/device/gpu/distribution/collective_init.h"
#include "runtime/device/gpu/gpu_bucket.h"
#include "runtime/device/gpu/gpu_device_address.h"
#include "utils/ms_utils.h"
#include "utils/config_manager.h"
#include "utils/ms_context.h"
@@ -456,6 +457,48 @@ void GPUSession::ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph)
}
}

void GPUSession::UpdateOutputTensors(const VectorRef *outputs,
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node) {
MS_EXCEPTION_IF_NULL(outputs);
for (const auto &item : *outputs) {
if (utils::isa<VectorRefPtr>(item)) {
const auto &vector_ref = utils::cast<VectorRef>(item);
UpdateOutputTensors(&vector_ref, tensor_to_node);
} else if (utils::isa<tensor::TensorPtr>(item)) {
const auto &tensor = utils::cast<tensor::TensorPtr>(item);
MS_EXCEPTION_IF_NULL(tensor);
const auto &iter = tensor_to_node.find(tensor);
if (iter != tensor_to_node.end()) {
const auto &node = iter->second.first;
const auto &output_index = iter->second.second;
const auto &address = AnfAlgo::GetMutableOutputAddr(node, output_index);
// The outputs may have the same tensor, so need skip when the tensor has been set to device address.
if ((address == nullptr) || (address->GetPtr() == nullptr)) {
return;
}
tensor->set_device_address(address);

// When the device address of graph output is set in tensor, the graph output need be set new device address,
// to avoid that the device address context of tensor be rewritten in the next step or next loop.
auto new_address = std::make_shared<device::gpu::GPUDeviceAddress>(nullptr, address->GetSize());
AnfAlgo::SetOutputAddr(new_address, output_index, node.get());

if (AnfAlgo::IsDynamicShape(node)) {
const auto &updated_shape = AnfAlgo::GetOutputInferShape(node, output_index);
ShapeVector int_shape;
std::transform(updated_shape.begin(), updated_shape.end(), std::back_inserter(int_shape), SizeToInt);
tensor->set_shape(int_shape);
}
}
if (tensor->NeedSyncDeviceToHostImmediately()) {
tensor->data_sync(false);
tensor->set_device_address(nullptr);
tensor->set_sync_status(kNeedSyncHostToDevice);
}
}
}
}

void GPUSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const {
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);


+ 3
- 0
mindspore/ccsrc/backend/session/gpu_session.h View File

@@ -20,6 +20,7 @@
#include <memory>
#include <algorithm>
#include <string>
#include <map>
#include "backend/session/session_basic.h"
#include "backend/session/kernel_graph.h"
#include "backend/session/session_factory.h"
@@ -53,6 +54,8 @@ class GPUSession : public SessionBasic {
std::string GetCommWorldGroup() override { return kNcclWorldGroup; }
void LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
const std::vector<tensor::TensorPtr> &inputs_const) const override;
void UpdateOutputTensors(const VectorRef *outputs,
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node) override;

private:
void SelectKernel(const std::shared_ptr<KernelGraph> &kernel_graph) const;


+ 53
- 6
mindspore/ccsrc/backend/session/session_basic.cc View File

@@ -210,9 +210,11 @@ BaseRef CreateNodeOutputTensor(const session::KernelWithIndex &node_output_pair,

BaseRef CreateNodeOutputTensors(const AnfNodePtr &anf, const KernelGraphPtr &graph,
const std::vector<tensor::TensorPtr> &input_tensors,
std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node) {
std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node,
KernelMapTensor *node_to_tensor) {
MS_EXCEPTION_IF_NULL(anf);
MS_EXCEPTION_IF_NULL(tensor_to_node);
MS_EXCEPTION_IF_NULL(node_to_tensor);
MS_LOG(INFO) << "Create tensor for output[" << anf->DebugString() << "]";
auto item_with_index = AnfAlgo::VisitKernelWithReturnType(anf, 0);
MS_EXCEPTION_IF_NULL(item_with_index.first);
@@ -223,7 +225,7 @@ BaseRef CreateNodeOutputTensors(const AnfNodePtr &anf, const KernelGraphPtr &gra
MS_EXCEPTION_IF_NULL(cnode);
VectorRef ret;
for (size_t i = 1; i < cnode->inputs().size(); ++i) {
auto out = CreateNodeOutputTensors(cnode->input(i), graph, input_tensors, tensor_to_node);
auto out = CreateNodeOutputTensors(cnode->input(i), graph, input_tensors, tensor_to_node, node_to_tensor);
ret.push_back(out);
}
return ret;
@@ -233,7 +235,16 @@ BaseRef CreateNodeOutputTensors(const AnfNodePtr &anf, const KernelGraphPtr &gra
if (size == 0) {
return VectorRef();
}
return CreateNodeOutputTensor(item_with_index, graph, input_tensors, tensor_to_node);

// The outputs of graph may have the same kernel node, no need to create new tensor.
const auto &iter = node_to_tensor->find(item_with_index);
if (iter != node_to_tensor->end()) {
return iter->second;
}

const auto &tensor = CreateNodeOutputTensor(item_with_index, graph, input_tensors, tensor_to_node);
(*node_to_tensor)[item_with_index] = tensor;
return tensor;
}

ValueNodePtr CreateNewValueNode(const AnfNodePtr &anf, KernelGraph *graph) {
@@ -1503,11 +1514,12 @@ void SessionBasic::UpdateOutputs(const std::shared_ptr<KernelGraph> &kernel_grap
MS_EXCEPTION_IF_NULL(kernel_graph);
MS_EXCEPTION_IF_NULL(outputs);
std::map<tensor::TensorPtr, session::KernelWithIndex> tensor_to_node;
KernelMapTensor node_to_tensor;
auto anf_outputs = kernel_graph->outputs();
for (auto &item : anf_outputs) {
MS_EXCEPTION_IF_NULL(item);
MS_LOG(INFO) << "Update output[" << item->DebugString() << "]";
outputs->emplace_back(CreateNodeOutputTensors(item, kernel_graph, input_tensors, &tensor_to_node));
outputs->emplace_back(CreateNodeOutputTensors(item, kernel_graph, input_tensors, &tensor_to_node, &node_to_tensor));
}

auto ms_context = MsContext::GetInstance();
@@ -1572,10 +1584,44 @@ void SessionBasic::CreateOutputTensors(const GraphId &graph_id, const std::vecto
MS_EXCEPTION_IF_NULL(outputs);
MS_EXCEPTION_IF_NULL(tensor_to_node);
auto anf_outputs = kernel_graph->outputs();
KernelMapTensor node_to_tensor;
for (auto &item : anf_outputs) {
MS_EXCEPTION_IF_NULL(item);
MS_LOG(INFO) << "Create node output[" << item->DebugString() << "]";
outputs->emplace_back(CreateNodeOutputTensors(item, kernel_graph, input_tensors, tensor_to_node));
outputs->emplace_back(CreateNodeOutputTensors(item, kernel_graph, input_tensors, tensor_to_node, &node_to_tensor));
}
}

void SessionBasic::UpdateOutputTensors(const VectorRef *outputs,
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node) {
MS_EXCEPTION_IF_NULL(outputs);
for (const auto &item : *outputs) {
if (utils::isa<VectorRefPtr>(item)) {
const auto &vector_ref = utils::cast<VectorRef>(item);
UpdateOutputTensors(&vector_ref, tensor_to_node);
} else if (utils::isa<tensor::TensorPtr>(item)) {
const auto &tensor = utils::cast<tensor::TensorPtr>(item);
MS_EXCEPTION_IF_NULL(tensor);
const auto &iter = tensor_to_node.find(tensor);
if (iter != tensor_to_node.end()) {
const auto &node = iter->second.first;
const auto &output_index = iter->second.second;
const auto &address = AnfAlgo::GetMutableOutputAddr(node, output_index);
tensor->set_device_address(address);

if (AnfAlgo::IsDynamicShape(node)) {
const auto &updated_shape = AnfAlgo::GetOutputInferShape(node, output_index);
ShapeVector int_shape;
std::transform(updated_shape.begin(), updated_shape.end(), std::back_inserter(int_shape), SizeToInt);
tensor->set_shape(int_shape);
}
}
if (tensor->NeedSyncDeviceToHostImmediately()) {
tensor->data_sync(false);
tensor->set_device_address(nullptr);
tensor->set_sync_status(kNeedSyncHostToDevice);
}
}
}
}

@@ -1622,11 +1668,12 @@ void SessionBasic::GetModelOutputsInfo(uint32_t graph_id, std::vector<tensor::Te

VectorRef vector_outputs;
std::map<tensor::TensorPtr, session::KernelWithIndex> tensor_to_node;
KernelMapTensor node_to_tensor;
auto anf_outputs = kernel_graph->outputs();
for (auto &item : anf_outputs) {
MS_EXCEPTION_IF_NULL(item);
MS_LOG(INFO) << "Create node output[" << item->DebugString() << "]";
vector_outputs.emplace_back(CreateNodeOutputTensors(item, kernel_graph, inputs, &tensor_to_node));
vector_outputs.emplace_back(CreateNodeOutputTensors(item, kernel_graph, inputs, &tensor_to_node, &node_to_tensor));
}
*outputs = TransformVectorRefToMultiTensor(vector_outputs);
for (size_t i = 0; i < outputs->size(); i++) {


+ 3
- 0
mindspore/ccsrc/backend/session/session_basic.h View File

@@ -73,6 +73,7 @@ struct OutputTensorInfo {
};

using OpRunInfoPtr = std::shared_ptr<OpRunInfo>;
using KernelMapTensor = std::map<session::KernelWithIndex, BaseRef, session::KernelWithIndexCmp>;
class Executor;

class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
@@ -169,6 +170,8 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
virtual void CreateOutputTensors(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &input_tensors,
VectorRef *outputs,
std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node);
virtual void UpdateOutputTensors(const VectorRef *outputs,
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node);
virtual void UnifyMindIR(const KernelGraphPtr &graph) {}
virtual GraphId CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) { return 0; }
virtual GraphId CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) { return kInvalidGraphId; }


Loading…
Cancel
Save