Browse Source

!16570 gpu inference

From: @wilfchen
Reviewed-by: @limingqi107,@cristoval
Signed-off-by: @cristoval
tags/v1.3.0
mindspore-ci-bot Gitee 4 years ago
parent
commit
ac9754b7c8
7 changed files with 84 additions and 3 deletions
  1. +2
    -1
      mindspore/ccsrc/backend/optimizer/trt_pass/trt_op_factory.h
  2. +55
    -0
      mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
  3. +2
    -0
      mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
  4. +3
    -0
      mindspore/ccsrc/backend/session/kernel_graph.h
  5. +21
    -1
      mindspore/ccsrc/cxx_api/graph/gpu/gpu_graph_impl.cc
  6. +1
    -0
      mindspore/ccsrc/cxx_api/graph/gpu/gpu_graph_impl.h
  7. +0
    -1
      mindspore/ccsrc/runtime/device/gpu/trt_loader.cc

+ 2
- 1
mindspore/ccsrc/backend/optimizer/trt_pass/trt_op_factory.h View File

@@ -50,7 +50,8 @@ class TrtOpFactory {
ConvertFunc GetConvertFunc(const std::string &op_name) const {
auto iter = op_convert_map_.find(op_name);
if (iter == op_convert_map_.end()) {
MS_LOG(EXCEPTION) << "Operator: " << op_name << " not support.";
MS_LOG(WARNING) << "Operator: " << op_name << " not support.";
return nullptr;
}
return iter->second;
}


+ 55
- 0
mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc View File

@@ -107,6 +107,53 @@ std::vector<size_t> TransShapeToSizet(const abstract::ShapePtr &shape) {
}

enum ShapeType { kMaxShape, kMinShape };

void GetRealOutputRecursively(const AnfNodePtr &node, size_t output_index,
std::vector<session::KernelWithIndex> *inputs) {
MS_EXCEPTION_IF_NULL(node);
if (node->isa<ValueNode>() || node->isa<Parameter>()) {
return inputs->push_back(std::make_pair(node, 0));
}

// Skip control node
if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimDepend) || AnfAlgo::CheckPrimitiveType(node, prim::kPrimLoad) ||
AnfAlgo::CheckPrimitiveType(node, prim::kPrimUpdateState)) {
return GetRealOutputRecursively(node->cast<CNodePtr>()->input(kRealInputIndexInDepend), 0, inputs);
}

// Bypass TupleGetItem
if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimTupleGetItem)) {
auto tuple_get_item = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(tuple_get_item);
auto input = AnfAlgo::GetTupleGetItemRealInput(tuple_get_item);
auto index = AnfAlgo::GetTupleGetItemOutIndex(tuple_get_item);

// Conceal MakeTuple + TupleGetItem pair.
if (AnfAlgo::CheckPrimitiveType(input, prim::kPrimMakeTuple)) {
auto make_tuple = input->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(make_tuple);
auto real_input = AnfAlgo::GetInputNode(make_tuple, index);
return GetRealOutputRecursively(real_input, 0, inputs);
}

// Skip TupleGetItem.
return GetRealOutputRecursively(input, index, inputs);
}

// Flatten MakeTuple inputs.
if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimMakeTuple)) {
auto make_tuple = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(make_tuple);
size_t input_num = AnfAlgo::GetInputTensorNum(make_tuple);
for (size_t input_index = 0; input_index < input_num; ++input_index) {
auto input_node = AnfAlgo::GetInputNode(make_tuple, input_index);
GetRealOutputRecursively(input_node, 0, inputs);
}
return;
}

return inputs->push_back(std::make_pair(node, output_index));
}
} // namespace

AnfNodePtr AnfRuntimeAlgorithm::MakeMonadValueNode(const KernelGraphPtr &kg) {
@@ -1956,5 +2003,13 @@ AnfNodeIndexSet AnfRuntimeAlgorithm::GetUpdateStateUsers(const FuncGraphManagerP
}
return update_states;
}

void AnfRuntimeAlgorithm::GetRealInputs(const AnfNodePtr &node, std::vector<session::KernelWithIndex> *inputs) {
size_t input_num = AnfAlgo::GetInputTensorNum(node);
for (size_t input_index = 0; input_index < input_num; ++input_index) {
auto input_node = AnfAlgo::GetInputNode(node->cast<CNodePtr>(), input_index);
GetRealOutputRecursively(input_node, 0, inputs);
}
}
} // namespace session
} // namespace mindspore

+ 2
- 0
mindspore/ccsrc/backend/session/anf_runtime_algorithm.h View File

@@ -273,6 +273,8 @@ class AnfRuntimeAlgorithm {
std::set<AnfNodePtr> *visited);
static void InsertMakeTupleForOutput(NotNull<KernelGraphPtr> root_graph);
static AnfNodeIndexSet GetUpdateStateUsers(const FuncGraphManagerPtr &manager, const AnfNodePtr &node);
// Get node real inputs, skip `MakeTuple`, `TupleGetItem`, `Depend`, `Load`, `UpdateState` etc.
static void GetRealInputs(const AnfNodePtr &anf_node, std::vector<session::KernelWithIndex> *inputs);
};
} // namespace session
using AnfAlgo = session::AnfRuntimeAlgorithm;


+ 3
- 0
mindspore/ccsrc/backend/session/kernel_graph.h View File

@@ -98,6 +98,9 @@ class KernelGraph : public FuncGraph {

const std::vector<AnfNodePtr> &inputs() const;
std::vector<AnfNodePtr> *MutableInputs() const { return inputs_.get(); }
void SetGraphInputs(const std::vector<AnfNodePtr> &inputs) {
inputs_ = std::make_shared<std::vector<AnfNodePtr>>(inputs);
}
void ReplaceGraphInput(const AnfNodePtr &old_parameter, const AnfNodePtr &new_parameter);
std::vector<AnfNodePtr> outputs() const;
CNodePtr NewCNode(const std::vector<AnfNodePtr> &inputs) override;


+ 21
- 1
mindspore/ccsrc/cxx_api/graph/gpu/gpu_graph_impl.cc View File

@@ -23,6 +23,7 @@
#include "backend/session/session_factory.h"
#include "backend/session/executor_manager.h"
#include "runtime/device/kernel_runtime_manager.h"
#include "runtime/device/gpu/cuda_driver.h"

namespace mindspore {
API_FACTORY_REG(GraphCell::GraphImpl, GPU, GPUGraphImpl);
@@ -36,7 +37,8 @@ GPUGraphImpl::GPUGraphImpl()
input_names_(),
output_names_(),
init_flag_(false),
load_flag_(false) {}
load_flag_(false),
set_device_id_flag_(false) {}

Status GPUGraphImpl::InitEnv() {
if (init_flag_) {
@@ -55,6 +57,13 @@ Status GPUGraphImpl::InitEnv() {
ms_context->set_param<uint32_t>(MS_CTX_DEVICE_ID, device_id_);
ms_context->set_param<std::string>(MS_CTX_DEVICE_TARGET, kGPUDevice);

// Set device id for sync data to host as cudaSetDevice is thread level config.
bool ret = device::gpu::CudaDriver::SetDevice(UintToInt(device_id_));
if (!ret) {
MS_LOG(ERROR) << "Failed to set device id:" << device_id_;
return kMCDeviceError;
}

auto &device_infos = graph_context_->MutableDeviceInfo();
if (device_infos.size() != 1) {
return kMCDeviceError;
@@ -194,6 +203,17 @@ Status GPUGraphImpl::Run(const std::vector<MSTensor> &inputs, std::vector<MSTens
}
}

// The `Load()` and `Run()` running in two threads. `Run()` always running in same thread.
// It should set device id once.
if (!set_device_id_flag_) {
bool ret = device::gpu::CudaDriver::SetDevice(UintToInt(device_id_));
if (!ret) {
MS_LOG(ERROR) << "Failed to set device id:" << device_id_;
return kMCDeviceError;
}
set_device_id_flag_ = true;
}

if (inputs.size() != inputs_info_.size()) {
MS_LOG(ERROR) << "inputs count not match, required count " << inputs_info_.size() << ", given count "
<< inputs.size();


+ 1
- 0
mindspore/ccsrc/cxx_api/graph/gpu/gpu_graph_impl.h View File

@@ -57,6 +57,7 @@ class GPUGraphImpl : public GraphCell::GraphImpl {
std::vector<std::string> output_names_;
bool init_flag_;
bool load_flag_;
bool set_device_id_flag_;

// tensor-rt
uint32_t batch_size_;


+ 0
- 1
mindspore/ccsrc/runtime/device/gpu/trt_loader.cc View File

@@ -30,7 +30,6 @@ TrtLoader::TrtLoader()
if (nvinfer_handle_ == nullptr) {
MS_LOG(WARNING) << "Can not open libnvinfer.so. " << dlerror()
<< ". Install Tensor-RT and export LD_LIBRARY_PATH=${TENSORRT_HOME}/lib:$LD_LIBRARY_PATH.";
MS_LOG(WARNING) << "Inference with native backend.";
return;
}



Loading…
Cancel
Save