From a2cdf589e723563036b4c8d34f7a7b283b7d99ce Mon Sep 17 00:00:00 2001 From: zhaozhenlong Date: Tue, 27 Apr 2021 17:30:10 +0800 Subject: [PATCH] fix npu reuse input tensor --- .../lite/src/runtime/agent/npu/npu_executor.cc | 18 ++++++++++++++---- .../lite/src/runtime/agent/npu/npu_executor.h | 2 +- .../runtime/agent/npu/subgraph_npu_kernel.cc | 2 +- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc index 384e61b71a..1389eba21b 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc +++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc @@ -15,6 +15,7 @@ */ #include "src/runtime/agent/npu/npu_executor.h" +#include #include "include/errorcode.h" #include "src/runtime/agent/npu/npu_manager.h" #include "nnacl/pack.h" @@ -100,15 +101,24 @@ bool IsSameShapeOutTensor(Tensor *tensor, std::shared_ptr npu_te } int NPUExecutor::Run(const std::vector &in_tensors, const std::vector &out_tensors, - const std::vector &out_kernels, + const std::vector &in_kernels, const std::vector &kernels, Allocator *allocator, const KernelCallBack &before, const KernelCallBack &after) { hiai::AiContext context; - std::vector inputs_visited(in_tensors.size(), false); + std::unordered_map tensor_uses; + for (const auto ker : in_kernels) { + for (const auto ker_input : ker->in_tensors()) { + if (tensor_uses.find(ker_input) == tensor_uses.end()) { + tensor_uses.insert({ker_input, 1}); + } else { + tensor_uses[ker_input]++; + } + } + } for (int i = 0; i < npu_input_tensors_.size(); ++i) { int index = 0; for (; index < in_tensors.size(); index++) { - if (!inputs_visited[index] && IsSameShapeInTensor(in_tensors[index], npu_input_tensors_[i])) { + if (tensor_uses[in_tensors[index]] > 0 && IsSameShapeInTensor(in_tensors[index], npu_input_tensors_[i])) { void *data = in_tensors[index]->data_c(); if (data == nullptr) { MS_LOG(ERROR) << "For " << model_name_ << ", the " << i << "th input data is nullptr"; @@ -116,7 +126,7 @@ int NPUExecutor::Run(const std::vector &in_tensors, const std::vector< } memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[index]->Size()); - inputs_visited[index] = true; + tensor_uses[in_tensors[index]]--; in_tensors[index]->DecRefCount(); break; } diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.h b/mindspore/lite/src/runtime/agent/npu/npu_executor.h index ad325f2934..dc65001ffc 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_executor.h +++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.h @@ -36,7 +36,7 @@ class NPUExecutor : public Executor { int Prepare(const std::vector &kernels) override; int Run(const std::vector &in_tensors, const std::vector &out_tensors, - const std::vector &out_kernels, const std::vector &kernels, + const std::vector &in_kernels, const std::vector &kernels, Allocator *allocator = nullptr, const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr); diff --git a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc index 468ac2aa24..15a3da9138 100644 --- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc +++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc @@ -90,7 +90,7 @@ std::shared_ptr SubGraphNpuKernel::BuildIRModel() { int SubGraphNpuKernel::Run() { return reinterpret_cast(this->executor_) - ->Run(in_tensors_, out_tensor_sorted_, out_nodes_, nodes_); + ->Run(in_tensors_, out_tensor_sorted_, in_nodes_, nodes_); } int SubGraphNpuKernel::BuildNPUInputOp() {