Browse Source

fix npu memleak delete op const

npu mgr not single
tags/v1.2.0-rc1
zhaozhenlong 4 years ago
parent
commit
2facf1ba07
26 changed files with 240 additions and 81 deletions
  1. +4
    -1
      mindspore/lite/src/CMakeLists.txt
  2. +14
    -1
      mindspore/lite/src/inner_context.cc
  3. +12
    -1
      mindspore/lite/src/inner_context.h
  4. +30
    -5
      mindspore/lite/src/lite_session.cc
  5. +8
    -0
      mindspore/lite/src/lite_session.h
  6. +8
    -1
      mindspore/lite/src/runtime/agent/npu/npu_executor.cc
  7. +6
    -1
      mindspore/lite/src/runtime/agent/npu/npu_executor.h
  8. +22
    -12
      mindspore/lite/src/runtime/agent/npu/npu_manager.cc
  9. +4
    -7
      mindspore/lite/src/runtime/agent/npu/npu_manager.h
  10. +16
    -8
      mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc
  11. +11
    -3
      mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h
  12. +7
    -3
      mindspore/lite/src/runtime/kernel/npu/fullconnection_npu.cc
  13. +1
    -0
      mindspore/lite/src/runtime/kernel/npu/fullconnection_npu.h
  14. +21
    -13
      mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.cc
  15. +2
    -0
      mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.h
  16. +2
    -0
      mindspore/lite/src/runtime/kernel/npu/npu_kernel.h
  17. +14
    -6
      mindspore/lite/src/runtime/kernel/npu/pad_npu.cc
  18. +2
    -0
      mindspore/lite/src/runtime/kernel/npu/pad_npu.h
  19. +8
    -4
      mindspore/lite/src/runtime/kernel/npu/resize_npu.cc
  20. +1
    -0
      mindspore/lite/src/runtime/kernel/npu/resize_npu.h
  21. +14
    -6
      mindspore/lite/src/runtime/kernel/npu/split_npu.cc
  22. +2
    -0
      mindspore/lite/src/runtime/kernel/npu/split_npu.h
  23. +7
    -3
      mindspore/lite/src/runtime/kernel/npu/unsqueeze_npu.cc
  24. +1
    -0
      mindspore/lite/src/runtime/kernel/npu/unsqueeze_npu.h
  25. +7
    -6
      mindspore/lite/src/scheduler.cc
  26. +16
    -0
      mindspore/lite/src/scheduler.h

+ 4
- 1
mindspore/lite/src/CMakeLists.txt View File

@@ -28,6 +28,10 @@ set(API_SRC
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/tensor/tensor_impl.cc ${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/tensor/tensor_impl.cc
) )


if(SUPPORT_NPU)
include_directories(${DDK_PATH})
endif()

set(LITE_SRC set(LITE_SRC
${API_SRC} ${API_SRC}
${CMAKE_CURRENT_SOURCE_DIR}/common/file_utils.cc ${CMAKE_CURRENT_SOURCE_DIR}/common/file_utils.cc
@@ -156,7 +160,6 @@ else()
endif() endif()
if(SUPPORT_NPU) if(SUPPORT_NPU)
add_subdirectory(runtime/agent/npu) add_subdirectory(runtime/agent/npu)
include_directories(${DDK_PATH})
target_link_libraries(mindspore-lite npu_kernel_mid) target_link_libraries(mindspore-lite npu_kernel_mid)
target_link_libraries(mindspore-lite_static npu_kernel_mid) target_link_libraries(mindspore-lite_static npu_kernel_mid)
endif() endif()


+ 14
- 1
mindspore/lite/src/inner_context.cc View File

@@ -31,6 +31,18 @@ InnerContext::InnerContext(const Context *context) {
} }
} }


#if SUPPORT_NPU
InnerContext::InnerContext(const Context *context, NPUManager *npu_manager) {
this->allocator = context->allocator;
this->thread_num_ = context->thread_num_;
this->device_list_.clear();
for (auto &device_ctx : context->device_list_) {
this->device_list_.push_back(device_ctx);
}
this->npu_manager_ = npu_manager;
}
#endif

int InnerContext::Init() { int InnerContext::Init() {
if (RET_OK != this->IsValid()) { if (RET_OK != this->IsValid()) {
MS_LOG(ERROR) << "Context is not valid"; MS_LOG(ERROR) << "Context is not valid";
@@ -120,10 +132,11 @@ bool InnerContext::IsGpuEnabled() const {


bool InnerContext::IsNpuEnabled() const { bool InnerContext::IsNpuEnabled() const {
#ifdef SUPPORT_NPU #ifdef SUPPORT_NPU
MS_ASSERT(npu_manager_ != nullptr);
return this->device_list_.end() != return this->device_list_.end() !=
std::find_if(this->device_list_.begin(), this->device_list_.end(), std::find_if(this->device_list_.begin(), this->device_list_.end(),
[](const DeviceContext &device) { return device.device_type_ == DT_NPU; }) && [](const DeviceContext &device) { return device.device_type_ == DT_NPU; }) &&
mindspore::lite::NPUManager::GetInstance()->IsSupportNPU();
npu_manager_->IsSupportNPU();
#else #else
return false; return false;
#endif #endif


+ 12
- 1
mindspore/lite/src/inner_context.h View File

@@ -20,6 +20,9 @@
#include "include/context.h" #include "include/context.h"
#include "src/runtime/runtime_api.h" #include "src/runtime/runtime_api.h"
#include "src/runtime/allocator.h" #include "src/runtime/allocator.h"
#ifdef SUPPORT_NPU
#include "src/runtime/agent/npu/npu_manager.h"
#endif


namespace mindspore::lite { namespace mindspore::lite {
struct InnerContext : public Context { struct InnerContext : public Context {
@@ -30,7 +33,9 @@ struct InnerContext : public Context {
InnerContext() = default; InnerContext() = default;


explicit InnerContext(const Context *context); explicit InnerContext(const Context *context);

#if SUPPORT_NPU
InnerContext(const Context *context, NPUManager *npu_manager);
#endif
int Init(); int Init();


bool IsCpuFloat16Enabled() const; bool IsCpuFloat16Enabled() const;
@@ -52,6 +57,12 @@ struct InnerContext : public Context {
int IsValid() const; int IsValid() const;


virtual ~InnerContext(); virtual ~InnerContext();

#if SUPPORT_NPU

private:
NPUManager *npu_manager_ = nullptr;
#endif
}; };
} // namespace mindspore::lite } // namespace mindspore::lite




+ 30
- 5
mindspore/lite/src/lite_session.cc View File

@@ -395,7 +395,11 @@ int LiteSession::CompileGraph(Model *model) {
return ret; return ret;
} }
// scheduler kernels // scheduler kernels
#if SUPPORT_NPU
Scheduler scheduler(context_, model, &tensors_, npu_manager_, npu_pass_manager_);
#else
Scheduler scheduler(context_, model, &tensors_); Scheduler scheduler(context_, model, &tensors_);
#endif
ret = scheduler.Schedule(&kernels_); ret = scheduler.Schedule(&kernels_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Schedule kernels failed: " << ret; MS_LOG(ERROR) << "Schedule kernels failed: " << ret;
@@ -404,7 +408,8 @@ int LiteSession::CompileGraph(Model *model) {
} }
#if SUPPORT_NPU #if SUPPORT_NPU
if (this->context_->IsNpuEnabled()) { if (this->context_->IsNpuEnabled()) {
if (mindspore::lite::NPUManager::GetInstance()->LoadOMModel() != RET_OK) {
MS_ASSERT(npu_manager_ != nullptr);
if (npu_manager_->LoadOMModel() != RET_OK) {
MS_LOG(ERROR) << "NPU client load model failed."; MS_LOG(ERROR) << "NPU client load model failed.";
return RET_ERROR; return RET_ERROR;
} }
@@ -501,14 +506,30 @@ int LiteSession::Init(const Context *context) {
MS_LOG(ERROR) << "Not support multi-threading"; MS_LOG(ERROR) << "Not support multi-threading";
return RET_ERROR; return RET_ERROR;
} }

#if SUPPORT_NPU
npu_manager_ = new (std::nothrow) NPUManager();
if (npu_manager_ == nullptr) {
MS_LOG(ERROR) << "New npu_manager_ failed";
is_running_.store(false);
return RET_ERROR;
}
npu_pass_manager_ = new (std::nothrow) NPUPassManager();
if (npu_pass_manager_ == nullptr) {
MS_LOG(ERROR) << "New npu_pass_manager_ failed";
is_running_.store(false);
return RET_ERROR;
}
#endif
if (context == nullptr) { if (context == nullptr) {
MS_LOG(ERROR) << "context is nullptr"; MS_LOG(ERROR) << "context is nullptr";
is_running_.store(false); is_running_.store(false);
return RET_NULL_PTR; return RET_NULL_PTR;
} }

#if SUPPORT_NPU
this->context_ = new (std::nothrow) InnerContext(context, npu_manager_);
#else
this->context_ = new (std::nothrow) InnerContext(context); this->context_ = new (std::nothrow) InnerContext(context);
#endif
if (this->context_ == nullptr) { if (this->context_ == nullptr) {
MS_LOG(ERROR) << "New Context failed"; MS_LOG(ERROR) << "New Context failed";
is_running_.store(false); is_running_.store(false);
@@ -579,8 +600,12 @@ LiteSession::~LiteSession() {
delete this->executor_; delete this->executor_;
this->executor_ = nullptr; this->executor_ = nullptr;
#if SUPPORT_NPU #if SUPPORT_NPU
mindspore::lite::NPUPassManager::GetInstance()->Clear();
mindspore::lite::NPUManager::GetInstance()->Reset();
MS_ASSERT(npu_manager_ != nullptr);
MS_ASSERT(npu_pass_manager_ != nullptr);
npu_pass_manager_->Clear();
delete npu_pass_manager_;
npu_manager_->Reset();
delete npu_manager_;
#endif #endif
#if GPU_OPENCL && !SUPPORT_TRAIN #if GPU_OPENCL && !SUPPORT_TRAIN
delete opencl_runtime_wrapper_; delete opencl_runtime_wrapper_;


+ 8
- 0
mindspore/lite/src/lite_session.h View File

@@ -31,6 +31,10 @@
#include "src/executor.h" #include "src/executor.h"
#include "src/tensor.h" #include "src/tensor.h"
#include "src/tensorlist.h" #include "src/tensorlist.h"
#if SUPPORT_NPU
#include "src/runtime/agent/npu/npu_manager.h"
#include "src/runtime/agent/npu/optimizer/npu_pass_manager.h"
#endif
#if GPU_OPENCL #if GPU_OPENCL
#include "src/runtime/gpu/opencl/opencl_runtime.h" #include "src/runtime/gpu/opencl/opencl_runtime.h"
#elif GPU_VULKAN #elif GPU_VULKAN
@@ -129,6 +133,10 @@ class LiteSession : public session::LiteSession {
Executor *executor_ = nullptr; Executor *executor_ = nullptr;
Model *model_ = nullptr; Model *model_ = nullptr;
std::atomic<bool> is_running_ = false; std::atomic<bool> is_running_ = false;
#if SUPPORT_NPU
NPUManager *npu_manager_ = nullptr;
NPUPassManager *npu_pass_manager_ = nullptr;
#endif
#if GPU_OPENCL && !SUPPORT_TRAIN #if GPU_OPENCL && !SUPPORT_TRAIN
opencl::OpenCLRuntimeWrapper *opencl_runtime_wrapper_{nullptr}; opencl::OpenCLRuntimeWrapper *opencl_runtime_wrapper_{nullptr};
#elif GPU_VULKAN && !SUPPORT_TRAIN #elif GPU_VULKAN && !SUPPORT_TRAIN


+ 8
- 1
mindspore/lite/src/runtime/agent/npu/npu_executor.cc View File

@@ -21,12 +21,19 @@
namespace mindspore::lite { namespace mindspore::lite {
NPUExecutor::~NPUExecutor() { NPUExecutor::~NPUExecutor() {
client_.reset(); client_.reset();
for (auto t : npu_input_tensors_) {
t.reset();
}
npu_input_tensors_.clear(); npu_input_tensors_.clear();
for (auto t : npu_output_tensors_) {
t.reset();
}
npu_output_tensors_.clear(); npu_output_tensors_.clear();
} }


int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(model_name_);
MS_ASSERT(npu_manager_ != nullptr);
this->client_ = npu_manager_->GetClient(model_name_);
if (this->client_ == nullptr) { if (this->client_ == nullptr) {
MS_LOG(ERROR) << "client is nullptr."; MS_LOG(ERROR) << "client is nullptr.";
return RET_ERROR; return RET_ERROR;


+ 6
- 1
mindspore/lite/src/runtime/agent/npu/npu_executor.h View File

@@ -23,11 +23,15 @@
#include "src/executor.h" #include "src/executor.h"
#include "include/errorcode.h" #include "include/errorcode.h"
#include "include/HiAiModelManagerService.h" #include "include/HiAiModelManagerService.h"
#ifdef SUPPORT_NPU
#include "src/runtime/agent/npu/npu_manager.h"
#endif


namespace mindspore::lite { namespace mindspore::lite {
class NPUExecutor : public Executor { class NPUExecutor : public Executor {
public: public:
explicit NPUExecutor(const std::string &model_name) { this->model_name_ = model_name; }
explicit NPUExecutor(const std::string &model_name, NPUManager *npu_manager = nullptr)
: model_name_(model_name), npu_manager_(npu_manager) {}
~NPUExecutor() override; ~NPUExecutor() override;
int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override; int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override;


@@ -45,6 +49,7 @@ class NPUExecutor : public Executor {


private: private:
std::string model_name_; std::string model_name_;
NPUManager *npu_manager_ = nullptr;
std::shared_ptr<hiai::AiModelMngerClient> client_ = nullptr; std::shared_ptr<hiai::AiModelMngerClient> client_ = nullptr;
std::vector<std::shared_ptr<hiai::AiTensor>> npu_input_tensors_; std::vector<std::shared_ptr<hiai::AiTensor>> npu_input_tensors_;
std::vector<std::shared_ptr<hiai::AiTensor>> npu_output_tensors_; std::vector<std::shared_ptr<hiai::AiTensor>> npu_output_tensors_;


+ 22
- 12
mindspore/lite/src/runtime/agent/npu/npu_manager.cc View File

@@ -57,25 +57,25 @@ bool NPUManager::CheckEMUIVersion() {
} }


void NPUManager::Reset() { void NPUManager::Reset() {
for (auto client : clients_) {
client->UnLoadModel();
client.reset();
}
clients_.clear();

index_ = 0; index_ = 0;
domi::HiaiIrBuild ir_build; domi::HiaiIrBuild ir_build;
for (const auto &model_map : models_) { for (const auto &model_map : models_) {
auto model = model_map.second; auto model = model_map.second;
if (!model->is_freed_) { if (!model->is_freed_) {
ir_build.ReleaseModelBuff(*model->model_buffer_data_); ir_build.ReleaseModelBuff(*model->model_buffer_data_);
model->model_buffer_data_ = nullptr;
model->is_freed_ = true; model->is_freed_ = true;
model->desc_.reset();
model->desc_ = nullptr;
model->client_.reset();
} }
model->model_buffer_data_.reset();
model->desc_.reset();
model->client_.reset();
} }
models_.clear(); models_.clear();
for (auto client : clients_) {
client->UnLoadModel();
client.reset();
}
clients_.clear();
} }


bool NPUManager::CheckDDKVersion() { bool NPUManager::CheckDDKVersion() {
@@ -141,8 +141,9 @@ bool NPUManager::IsKirinChip() {
return false; return false;
} }


int NPUManager::AddModel(domi::ModelBufferData *model_buffer_data, const std::string &model_name, int frequency) {
auto model = new SubGraphModel(index_, model_name, model_buffer_data);
int NPUManager::AddModel(std::shared_ptr<domi::ModelBufferData> model_buffer_data, const std::string &model_name,
int frequency) {
auto model = std::make_shared<SubGraphModel>(index_, model_name, model_buffer_data);
auto desc = std::make_shared<hiai::AiModelDescription>(model_name, frequency, 0, 0, 0); auto desc = std::make_shared<hiai::AiModelDescription>(model_name, frequency, 0, 0, 0);
model->desc_ = desc; model->desc_ = desc;
models_.insert({model_name, model}); models_.insert({model_name, model});
@@ -168,6 +169,7 @@ int NPUManager::LoadOMModel() {
std::vector<std::shared_ptr<hiai::AiModelDescription>> models_desc; std::vector<std::shared_ptr<hiai::AiModelDescription>> models_desc;
std::shared_ptr<hiai::AiModelMngerClient> client = nullptr; std::shared_ptr<hiai::AiModelMngerClient> client = nullptr;
std::shared_ptr<hiai::AiModelBuilder> mc_builder = nullptr; std::shared_ptr<hiai::AiModelBuilder> mc_builder = nullptr;
std::unordered_map<std::shared_ptr<hiai::AiModelBuilder>, hiai::MemBuffer *> builder_buffer_map;
int total = 0; int total = 0;
for (const auto &model_map : models_) { for (const auto &model_map : models_) {
if (total % MAX_MODEL_NUM == 0) { if (total % MAX_MODEL_NUM == 0) {
@@ -194,7 +196,8 @@ int NPUManager::LoadOMModel() {
MS_LOG(ERROR) << "NPU input memory buffer create failed."; MS_LOG(ERROR) << "NPU input memory buffer create failed.";
return RET_ERROR; return RET_ERROR;
} }
model->desc_->SetModelBuffer(model->model_buffer_data_->data, model->model_buffer_data_->length);
builder_buffer_map.insert({mc_builder, buffer});
model->desc_->SetModelBuffer(buffer->GetMemBufferData(), buffer->GetMemBufferSize());
if (models_desc.size() == MAX_MODEL_NUM) { if (models_desc.size() == MAX_MODEL_NUM) {
auto ret = LoadModel(client, models_desc); auto ret = LoadModel(client, models_desc);
if (ret != RET_ERROR) { if (ret != RET_ERROR) {
@@ -214,10 +217,17 @@ int NPUManager::LoadOMModel() {
models_desc.clear(); models_desc.clear();
} }


for (auto it : builder_buffer_map) {
it.first->MemBufferDestroy(it.second);
}
builder_buffer_map.clear();
return RET_OK; return RET_OK;
} }


std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient(const std::string &model_name) { std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient(const std::string &model_name) {
if (models_.find(model_name) == models_.end() || models_[model_name] == nullptr) {
return nullptr;
}
return models_[model_name]->client_; return models_[model_name]->client_;
} }




+ 4
- 7
mindspore/lite/src/runtime/agent/npu/npu_manager.h View File

@@ -32,7 +32,7 @@ static std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = {
schema::PrimitiveType_MaxPoolFusion, schema::PrimitiveType_AvgPoolFusion}; schema::PrimitiveType_MaxPoolFusion, schema::PrimitiveType_AvgPoolFusion};
struct SubGraphModel { struct SubGraphModel {
public: public:
SubGraphModel(int index, std::string model_name, domi::ModelBufferData *model_buffer_data)
SubGraphModel(int index, std::string model_name, std::shared_ptr<domi::ModelBufferData> model_buffer_data)
: index_(index), model_name_(std::move(model_name)), model_buffer_data_(model_buffer_data) {} : index_(index), model_name_(std::move(model_name)), model_buffer_data_(model_buffer_data) {}


bool is_freed_ = false; bool is_freed_ = false;
@@ -45,17 +45,14 @@ struct SubGraphModel {
}; };
class NPUManager { class NPUManager {
public: public:
static NPUManager *GetInstance() {
static NPUManager manager;
return &manager;
}
NPUManager() = default;


~NPUManager() { Reset(); } ~NPUManager() { Reset(); }


bool IsSupportNPU(); bool IsSupportNPU();


// provide to subgraph to add model. // provide to subgraph to add model.
int AddModel(domi::ModelBufferData *model_buffer_data, const std::string &model_name, int frequency);
int AddModel(std::shared_ptr<domi::ModelBufferData> model_buffer_data, const std::string &model_name, int frequency);


// scheduler to load om model. // scheduler to load om model.
int LoadOMModel(); int LoadOMModel();
@@ -85,7 +82,7 @@ class NPUManager {
int index_ = 0; int index_ = 0;
bool is_check_version_ = false; bool is_check_version_ = false;
bool is_support_ = false; bool is_support_ = false;
std::unordered_map<std::string, SubGraphModel *> models_;
std::unordered_map<std::string, std::shared_ptr<SubGraphModel>> models_;
std::vector<std::shared_ptr<hiai::AiModelMngerClient>> clients_; std::vector<std::shared_ptr<hiai::AiModelMngerClient>> clients_;
}; };




+ 16
- 8
mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc View File

@@ -36,12 +36,17 @@ using mindspore::lite::RET_OK;
SubGraphNpuKernel::~SubGraphNpuKernel() { SubGraphNpuKernel::~SubGraphNpuKernel() {
subgraph_input_op_.clear(); subgraph_input_op_.clear();
subgraph_output_op_.clear(); subgraph_output_op_.clear();
out_tensor_sorted_.clear();
for (auto op : op_buffer_) {
delete op;
}
if (executor_ != nullptr) { if (executor_ != nullptr) {
delete executor_; delete executor_;
} }
op_buffer_.clear();
} }


domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() {
std::shared_ptr<domi::ModelBufferData> SubGraphNpuKernel::BuildIRModel() {
ge::Graph graph("NPUGraph"); ge::Graph graph("NPUGraph");


auto ret = BuildNPUInputOp(); auto ret = BuildNPUInputOp();
@@ -58,20 +63,18 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() {
ge::Model model(GetOMModelName(), mindspore::lite::Version()); ge::Model model(GetOMModelName(), mindspore::lite::Version());
model.SetGraph(graph); model.SetGraph(graph);
domi::HiaiIrBuild ir_build; domi::HiaiIrBuild ir_build;
auto om_model_buff = new (std::nothrow) domi::ModelBufferData;
auto om_model_buff = std::make_shared<domi::ModelBufferData>();
if (om_model_buff == nullptr) { if (om_model_buff == nullptr) {
MS_LOG(ERROR) << "OM model buffer is nullptr."; MS_LOG(ERROR) << "OM model buffer is nullptr.";
return nullptr; return nullptr;
} }
if (!ir_build.CreateModelBuff(model, *om_model_buff)) { if (!ir_build.CreateModelBuff(model, *om_model_buff)) {
MS_LOG(ERROR) << "Create model buffer failed."; MS_LOG(ERROR) << "Create model buffer failed.";
delete om_model_buff;
return nullptr; return nullptr;
} }
if (!ir_build.BuildIRModel(model, *om_model_buff)) { if (!ir_build.BuildIRModel(model, *om_model_buff)) {
MS_LOG(ERROR) << "Build IR model failed."; MS_LOG(ERROR) << "Build IR model failed.";
ir_build.ReleaseModelBuff(*om_model_buff); ir_build.ReleaseModelBuff(*om_model_buff);
delete om_model_buff;
return nullptr; return nullptr;
} }
return om_model_buff; return om_model_buff;
@@ -85,6 +88,7 @@ int SubGraphNpuKernel::Run() {
int SubGraphNpuKernel::BuildNPUInputOp() { int SubGraphNpuKernel::BuildNPUInputOp() {
int count = 0; int count = 0;
subgraph_input_op_.clear(); subgraph_input_op_.clear();
op_buffer_.clear();
for (auto node : this->nodes_) { for (auto node : this->nodes_) {
std::vector<ge::Operator *> node_input_op; std::vector<ge::Operator *> node_input_op;
for (auto in_tensor : node->in_tensors()) { for (auto in_tensor : node->in_tensors()) {
@@ -94,6 +98,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name); data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name);
subgraph_input_op_.push_back(*data); subgraph_input_op_.push_back(*data);
node_input_op.push_back(data); node_input_op.push_back(data);
op_buffer_.push_back(data);
continue; continue;
} }


@@ -130,6 +135,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
auto weight_tensor = mindspore::lite::ConverterToNPUTensor(in_tensor); auto weight_tensor = mindspore::lite::ConverterToNPUTensor(in_tensor);
weight_const->set_attr_value(weight_tensor); weight_const->set_attr_value(weight_tensor);
node_input_op.push_back(weight_const); node_input_op.push_back(weight_const);
op_buffer_.push_back(weight_const);
} }
} }
} }
@@ -140,6 +146,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
return RET_ERROR; return RET_ERROR;
} }
} }

return RET_OK; return RET_OK;
} }


@@ -176,17 +183,18 @@ std::string SubGraphNpuKernel::GetOMModelName() { return this->name_ + ".om"; }


int SubGraphNpuKernel::Init() { int SubGraphNpuKernel::Init() {
if (!is_compiled_) { if (!is_compiled_) {
name_ = "kNpuSubGraph" + std::to_string(mindspore::lite::NPUManager::GetInstance()->index());
name_ = "kNpuSubGraph" + std::to_string(npu_manager_->index());
auto model_buffer_data = BuildIRModel(); auto model_buffer_data = BuildIRModel();
if (model_buffer_data == nullptr) { if (model_buffer_data == nullptr) {
MS_LOG(ERROR) << "Build IR model failed."; MS_LOG(ERROR) << "Build IR model failed.";
return RET_ERROR; return RET_ERROR;
} }


mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data, GetOMModelName(),
context_->GetNpuInfo().frequency_);
MS_ASSERT(npu_manager_ != nullptr);

npu_manager_->AddModel(model_buffer_data, GetOMModelName(), context_->GetNpuInfo().frequency_);


executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName());
executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName(), npu_manager_);


if (executor_ == nullptr) { if (executor_ == nullptr) {
MS_LOG(ERROR) << "Create NPUExecutor failed."; MS_LOG(ERROR) << "Create NPUExecutor failed.";


+ 11
- 3
mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h View File

@@ -18,10 +18,14 @@
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_ #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_
#include <vector> #include <vector>
#include <string> #include <string>
#include <memory>
#include "include/hiai_ir_build.h" #include "include/hiai_ir_build.h"
#include "src/sub_graph_kernel.h" #include "src/sub_graph_kernel.h"
#include "src/runtime/agent/npu/npu_executor.h" #include "src/runtime/agent/npu/npu_executor.h"
#include "include/graph/op/all_ops.h" #include "include/graph/op/all_ops.h"
#ifdef SUPPORT_NPU
#include "src/runtime/agent/npu/npu_manager.h"
#endif


namespace mindspore::kernel { namespace mindspore::kernel {
using mindspore::lite::RET_ERROR; using mindspore::lite::RET_ERROR;
@@ -31,8 +35,8 @@ class SubGraphNpuKernel : public SubGraphKernel {
SubGraphNpuKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, SubGraphNpuKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
const std::vector<kernel::LiteKernel *> &inKernels, const std::vector<kernel::LiteKernel *> &inKernels,
const std::vector<kernel::LiteKernel *> &outKernels, const std::vector<kernel::LiteKernel *> &nodes, const std::vector<kernel::LiteKernel *> &outKernels, const std::vector<kernel::LiteKernel *> &nodes,
const lite::InnerContext *ctx = nullptr)
: SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx) {
const lite::InnerContext *ctx = nullptr, lite::NPUManager *npu_manager = nullptr)
: SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx), npu_manager_(npu_manager) {
subgraph_type_ = kNpuSubGraph; subgraph_type_ = kNpuSubGraph;
} }


@@ -56,7 +60,7 @@ class SubGraphNpuKernel : public SubGraphKernel {
} }


private: private:
domi::ModelBufferData *BuildIRModel();
std::shared_ptr<domi::ModelBufferData> BuildIRModel();


int BuildNPUInputOp(); int BuildNPUInputOp();


@@ -71,11 +75,15 @@ class SubGraphNpuKernel : public SubGraphKernel {
private: private:
bool is_compiled_ = false; bool is_compiled_ = false;


lite::NPUManager *npu_manager_ = nullptr;

std::vector<ge::Operator> subgraph_input_op_; std::vector<ge::Operator> subgraph_input_op_;


std::vector<ge::Operator> subgraph_output_op_; std::vector<ge::Operator> subgraph_output_op_;


std::vector<lite::Tensor *> out_tensor_sorted_; std::vector<lite::Tensor *> out_tensor_sorted_;

std::vector<ge::Operator *> op_buffer_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_

+ 7
- 3
mindspore/lite/src/runtime/kernel/npu/fullconnection_npu.cc View File

@@ -42,13 +42,13 @@ int FullconnectionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inp
for (int i = 1; i < input_shape.size(); i++) { for (int i = 1; i < input_shape.size(); i++) {
col *= input_shape[i]; col *= input_shape[i];
} }
auto reshape_op = new (std::nothrow) hiai::op::Const(name_ + "_reshape_data");
reshape_op_ = new (std::nothrow) hiai::op::Const(name_ + "_reshape_data");
vector<int> reshape_data = {input_shape[0], col}; vector<int> reshape_data = {input_shape[0], col};
ge::TensorDesc reshape_tensor_desc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_FLOAT); ge::TensorDesc reshape_tensor_desc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_FLOAT);
ge::TensorPtr reshape_tensor = std::make_shared<hiai::Tensor>(reshape_tensor_desc); ge::TensorPtr reshape_tensor = std::make_shared<hiai::Tensor>(reshape_tensor_desc);
reshape_tensor->SetData(reinterpret_cast<uint8_t *>(reshape_data.data()), 2 * sizeof(float)); reshape_tensor->SetData(reinterpret_cast<uint8_t *>(reshape_data.data()), 2 * sizeof(float));
reshape_op->set_attr_value(reshape_tensor);
reshape_->set_input_shape(*reshape_op);
reshape_op_->set_attr_value(reshape_tensor);
reshape_->set_input_shape(*reshape_op_);


fc_ = new (std::nothrow) hiai::op::MatMul(name_); fc_ = new (std::nothrow) hiai::op::MatMul(name_);
if (fc_ == nullptr) { if (fc_ == nullptr) {
@@ -117,6 +117,10 @@ FullconnectionNPUKernel::~FullconnectionNPUKernel() {
delete biasadd_; delete biasadd_;
biasadd_ = nullptr; biasadd_ = nullptr;
} }
if (reshape_op_ != nullptr) {
delete reshape_op_;
reshape_op_ = nullptr;
}
} }
REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_FullConnection, NPUKernelCreator<FullconnectionNPUKernel>) REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_FullConnection, NPUKernelCreator<FullconnectionNPUKernel>)
} // namespace mindspore::kernel } // namespace mindspore::kernel

+ 1
- 0
mindspore/lite/src/runtime/kernel/npu/fullconnection_npu.h View File

@@ -40,6 +40,7 @@ class FullconnectionNPUKernel : public ConvolutionBaseNPUKernel {
hiai::op::Reshape *reshape_ = nullptr; hiai::op::Reshape *reshape_ = nullptr;
hiai::op::MatMul *fc_ = nullptr; hiai::op::MatMul *fc_ = nullptr;
hiai::op::BiasAdd *biasadd_ = nullptr; hiai::op::BiasAdd *biasadd_ = nullptr;
hiai::op::Const *reshape_op_ = nullptr;
MatMulParameter *fc_param_; MatMulParameter *fc_param_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel


+ 21
- 13
mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.cc View File

@@ -39,11 +39,6 @@ int InstanceNormNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &input
} }
op_->set_input_x(*npu_inputs[0]); op_->set_input_x(*npu_inputs[0]);


auto gamma = new (std::nothrow) hiai::op::Const(name_ + "_gamma");
if (gamma == nullptr) {
MS_LOG(ERROR) << "New gamma const failed.";
return RET_ERROR;
}
auto gamma_shape = inputs[1]->shape(); auto gamma_shape = inputs[1]->shape();
std::shared_ptr<ge::Tensor> gamma_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor()); std::shared_ptr<ge::Tensor> gamma_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
if (gamma_tensor == nullptr) { if (gamma_tensor == nullptr) {
@@ -54,14 +49,14 @@ int InstanceNormNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &input
lite::ConverterToNPUDataType(inputs[1]->data_type())); lite::ConverterToNPUDataType(inputs[1]->data_type()));
gamma_tensor->SetTensorDesc(gamma_tensor_desc); gamma_tensor->SetTensorDesc(gamma_tensor_desc);
gamma_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs[1]->data_c()), inputs[1]->Size()); gamma_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs[1]->data_c()), inputs[1]->Size());
gamma->set_attr_value(gamma_tensor);
op_->set_input_gamma(*gamma);

auto beta = new (std::nothrow) hiai::op::Const(name_ + "_beta");
if (beta == nullptr) {
MS_LOG(ERROR) << "New beta const failed.";
gamma_ = new (std::nothrow) hiai::op::Const(name_ + "_gamma");
if (gamma_ == nullptr) {
MS_LOG(ERROR) << "New gamma_ const failed.";
return RET_ERROR; return RET_ERROR;
} }
gamma_->set_attr_value(gamma_tensor);
op_->set_input_gamma(*gamma_);

auto beta_shape = inputs[2]->shape(); auto beta_shape = inputs[2]->shape();
std::shared_ptr<ge::Tensor> beta_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor()); std::shared_ptr<ge::Tensor> beta_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
if (beta_tensor == nullptr) { if (beta_tensor == nullptr) {
@@ -72,8 +67,13 @@ int InstanceNormNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &input
lite::ConverterToNPUDataType(inputs[2]->data_type())); lite::ConverterToNPUDataType(inputs[2]->data_type()));
beta_tensor->SetTensorDesc(beta_tensor_desc); beta_tensor->SetTensorDesc(beta_tensor_desc);
beta_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs[2]->data_c()), inputs[2]->Size()); beta_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs[2]->data_c()), inputs[2]->Size());
beta->set_attr_value(beta_tensor);
op_->set_input_beta(*beta);
beta_ = new (std::nothrow) hiai::op::Const(name_ + "_beta");
if (beta_ == nullptr) {
MS_LOG(ERROR) << "New beta_ const failed.";
return RET_ERROR;
}
beta_->set_attr_value(beta_tensor);
op_->set_input_beta(*beta_);
op_->set_attr_epsilon(instance_norm_param_->epsilon_); op_->set_attr_epsilon(instance_norm_param_->epsilon_);
return RET_OK; return RET_OK;
} }
@@ -85,6 +85,14 @@ InstanceNormNPUKernel::~InstanceNormNPUKernel() {
delete op_; delete op_;
op_ = nullptr; op_ = nullptr;
} }
if (gamma_ != nullptr) {
delete gamma_;
gamma_ = nullptr;
}
if (beta_ != nullptr) {
delete beta_;
beta_ = nullptr;
}
} }


REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_InstanceNorm, NPUKernelCreator<InstanceNormNPUKernel>) REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_InstanceNorm, NPUKernelCreator<InstanceNormNPUKernel>)


+ 2
- 0
mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.h View File

@@ -38,6 +38,8 @@ class InstanceNormNPUKernel : public NPUKernel {


private: private:
hiai::op::InstanceNorm *op_ = nullptr; hiai::op::InstanceNorm *op_ = nullptr;
hiai::op::Const *gamma_ = nullptr;
hiai::op::Const *beta_ = nullptr;
InstanceNormParameter *instance_norm_param_; InstanceNormParameter *instance_norm_param_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel


+ 2
- 0
mindspore/lite/src/runtime/kernel/npu/npu_kernel.h View File

@@ -54,10 +54,12 @@ kernel::LiteKernel *NPUKernelCreator(const std::vector<lite::Tensor *> &inputs,
if (!op_parameter->infer_flag_) { if (!op_parameter->infer_flag_) {
MS_LOG(ERROR) << "NPU does not support runtime inference shape. Type is:" MS_LOG(ERROR) << "NPU does not support runtime inference shape. Type is:"
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_)); << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_));
free(op_parameter);
return nullptr; return nullptr;
} }
if (inputs[0]->shape().size() > 4) { if (inputs[0]->shape().size() > 4) {
MS_LOG(ERROR) << "Npu does not support input tensor dims greater than 4"; MS_LOG(ERROR) << "Npu does not support input tensor dims greater than 4";
free(op_parameter);
return nullptr; return nullptr;
} }
auto *kernel = new (std::nothrow) T(op_parameter, inputs, outputs, ctx); auto *kernel = new (std::nothrow) T(op_parameter, inputs, outputs, ctx);


+ 14
- 6
mindspore/lite/src/runtime/kernel/npu/pad_npu.cc View File

@@ -51,19 +51,19 @@ int PadNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const
ge::TensorDesc padding_tensor_desc(ge::Shape({size, 2}), ge::FORMAT_NCHW, ge::DT_INT32); ge::TensorDesc padding_tensor_desc(ge::Shape({size, 2}), ge::FORMAT_NCHW, ge::DT_INT32);
ge::TensorPtr padding_tensor = std::make_shared<hiai::Tensor>(padding_tensor_desc); ge::TensorPtr padding_tensor = std::make_shared<hiai::Tensor>(padding_tensor_desc);
padding_tensor->SetData(reinterpret_cast<uint8_t *>(paddings_.data()), 2 * size * sizeof(int)); padding_tensor->SetData(reinterpret_cast<uint8_t *>(paddings_.data()), 2 * size * sizeof(int));
auto paddings = new hiai::op::Const(name_ + "paddings");
paddings->set_attr_value(padding_tensor);
hiai_paddings_ = new hiai::op::Const(name_ + "paddings");
hiai_paddings_->set_attr_value(padding_tensor);


ge::TensorDesc constant_values_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT); ge::TensorDesc constant_values_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT);
ge::TensorPtr constant_values_tensor = std::make_shared<hiai::Tensor>(constant_values_tensor_desc); ge::TensorPtr constant_values_tensor = std::make_shared<hiai::Tensor>(constant_values_tensor_desc);
vector<float> constant_values_data_value = {param_->constant_value_}; vector<float> constant_values_data_value = {param_->constant_value_};
constant_values_tensor->SetData(reinterpret_cast<uint8_t *>(constant_values_data_value.data()), 1 * sizeof(float)); constant_values_tensor->SetData(reinterpret_cast<uint8_t *>(constant_values_data_value.data()), 1 * sizeof(float));
auto constant = new hiai::op::Const(name_ + "constant");
constant->set_attr_value(constant_values_tensor);
hiai_constant_ = new hiai::op::Const(name_ + "constant");
hiai_constant_->set_attr_value(constant_values_tensor);


op_->set_input_x(*npu_inputs[0]); op_->set_input_x(*npu_inputs[0]);
op_->set_input_constant_values(*constant);
op_->set_input_paddings(*paddings);
op_->set_input_constant_values(*hiai_constant_);
op_->set_input_paddings(*hiai_paddings_);


return RET_OK; return RET_OK;
} }
@@ -75,6 +75,14 @@ PadNPUKernel::~PadNPUKernel() {
delete op_; delete op_;
op_ = nullptr; op_ = nullptr;
} }
if (hiai_paddings_ != nullptr) {
delete hiai_paddings_;
hiai_paddings_ = nullptr;
}
if (hiai_constant_ != nullptr) {
delete hiai_constant_;
hiai_constant_ = nullptr;
}
} }


REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_PadFusion, NPUKernelCreator<PadNPUKernel>) REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_PadFusion, NPUKernelCreator<PadNPUKernel>)


+ 2
- 0
mindspore/lite/src/runtime/kernel/npu/pad_npu.h View File

@@ -40,6 +40,8 @@ class PadNPUKernel : public NPUKernel {
hiai::op::PadV2 *op_ = nullptr; hiai::op::PadV2 *op_ = nullptr;
PadParameter *param_; PadParameter *param_;
std::vector<int> paddings_; std::vector<int> paddings_;
hiai::op::Const *hiai_paddings_ = nullptr;
hiai::op::Const *hiai_constant_ = nullptr;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_PAD_NPU_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_PAD_NPU_H_

+ 8
- 4
mindspore/lite/src/runtime/kernel/npu/resize_npu.cc View File

@@ -46,8 +46,8 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
vector<int32_t> dataValue = {static_cast<int32_t>(resize_parameter_->new_height_), vector<int32_t> dataValue = {static_cast<int32_t>(resize_parameter_->new_height_),
static_cast<int32_t>(resize_parameter_->new_width_)}; static_cast<int32_t>(resize_parameter_->new_width_)};
sizeTensor->SetData(reinterpret_cast<uint8_t *>(dataValue.data()), 2 * sizeof(int32_t)); sizeTensor->SetData(reinterpret_cast<uint8_t *>(dataValue.data()), 2 * sizeof(int32_t));
auto out_size = new (std::nothrow) hiai::op::Const(name_ + "_size");
out_size->set_attr_value(sizeTensor);
out_size_ = new (std::nothrow) hiai::op::Const(name_ + "_size");
out_size_->set_attr_value(sizeTensor);
if (resize_parameter_->method_ == schema::ResizeMethod_LINEAR) { if (resize_parameter_->method_ == schema::ResizeMethod_LINEAR) {
auto op = new (std::nothrow) hiai::op::ResizeBilinearV2(name_); auto op = new (std::nothrow) hiai::op::ResizeBilinearV2(name_);
if (op == nullptr) { if (op == nullptr) {
@@ -57,7 +57,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
op->set_attr_align_corners(resize_parameter_->coordinate_transform_mode_ == op->set_attr_align_corners(resize_parameter_->coordinate_transform_mode_ ==
schema::CoordinateTransformMode_ALIGN_CORNERS); schema::CoordinateTransformMode_ALIGN_CORNERS);
op->set_input_x(*npu_inputs[0]); op->set_input_x(*npu_inputs[0]);
op->set_input_size(*out_size);
op->set_input_size(*out_size_);
op->set_attr_half_pixel_centers(resize_parameter_->preserve_aspect_ratio_); op->set_attr_half_pixel_centers(resize_parameter_->preserve_aspect_ratio_);
op_ = op; op_ = op;
} else if (resize_parameter_->method_ == schema::ResizeMethod_NEAREST) { } else if (resize_parameter_->method_ == schema::ResizeMethod_NEAREST) {
@@ -69,7 +69,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
op->set_attr_align_corners(resize_parameter_->coordinate_transform_mode_ == op->set_attr_align_corners(resize_parameter_->coordinate_transform_mode_ ==
schema::CoordinateTransformMode_ALIGN_CORNERS); schema::CoordinateTransformMode_ALIGN_CORNERS);
op->set_input_x(*npu_inputs[0]); op->set_input_x(*npu_inputs[0]);
op->set_input_size(*out_size);
op->set_input_size(*out_size_);
op_ = op; op_ = op;
} else { } else {
MS_LOG(WARNING) << "Unsupported resize method type:" << resize_parameter_->method_; MS_LOG(WARNING) << "Unsupported resize method type:" << resize_parameter_->method_;
@@ -85,6 +85,10 @@ ResizeNPUKernel::~ResizeNPUKernel() {
delete op_; delete op_;
op_ = nullptr; op_ = nullptr;
} }
if (out_size_ != nullptr) {
delete out_size_;
out_size_ = nullptr;
}
} }
REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Resize, NPUKernelCreator<ResizeNPUKernel>) REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Resize, NPUKernelCreator<ResizeNPUKernel>)
} // namespace mindspore::kernel } // namespace mindspore::kernel

+ 1
- 0
mindspore/lite/src/runtime/kernel/npu/resize_npu.h View File

@@ -40,6 +40,7 @@ class ResizeNPUKernel : public NPUKernel {


private: private:
ge::Operator *op_ = nullptr; ge::Operator *op_ = nullptr;
hiai::op::Const *out_size_ = nullptr;
ResizeParameter *resize_parameter_; ResizeParameter *resize_parameter_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel


+ 14
- 6
mindspore/lite/src/runtime/kernel/npu/split_npu.cc View File

@@ -39,20 +39,20 @@ int SplitNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, cons
ge::TensorDesc size_splits_tensor_desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32); ge::TensorDesc size_splits_tensor_desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32);
ge::TensorPtr size_splits_tensor = std::make_shared<hiai::Tensor>(size_splits_tensor_desc); ge::TensorPtr size_splits_tensor = std::make_shared<hiai::Tensor>(size_splits_tensor_desc);
size_splits_tensor->SetData(reinterpret_cast<uint8_t *>(param_->split_sizes_), size * sizeof(int)); size_splits_tensor->SetData(reinterpret_cast<uint8_t *>(param_->split_sizes_), size * sizeof(int));
auto size_splits = new hiai::op::Const(name_ + "_size");
size_splits->set_attr_value(size_splits_tensor);
size_splits_ = new hiai::op::Const(name_ + "_size");
size_splits_->set_attr_value(size_splits_tensor);


ge::TensorDesc split_dim_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_INT32); ge::TensorDesc split_dim_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_INT32);
ge::TensorPtr split_dim_tensor = std::make_shared<hiai::Tensor>(split_dim_tensor_desc); ge::TensorPtr split_dim_tensor = std::make_shared<hiai::Tensor>(split_dim_tensor_desc);
vector<int32_t> split_dim_data_value = {param_->split_dim_}; vector<int32_t> split_dim_data_value = {param_->split_dim_};
split_dim_tensor->SetData(reinterpret_cast<uint8_t *>(split_dim_data_value.data()), 1 * sizeof(int)); split_dim_tensor->SetData(reinterpret_cast<uint8_t *>(split_dim_data_value.data()), 1 * sizeof(int));
auto split_dim = new hiai::op::Const(name_ + "_dim");
split_dim->set_attr_value(split_dim_tensor);
split_dim_ = new hiai::op::Const(name_ + "_dim");
split_dim_->set_attr_value(split_dim_tensor);


op_->set_input_x(*npu_inputs[0]); op_->set_input_x(*npu_inputs[0]);
op_->set_attr_num_split(param_->num_split_); op_->set_attr_num_split(param_->num_split_);
op_->set_input_split_dim(*split_dim);
op_->set_input_size_splits(*size_splits);
op_->set_input_split_dim(*split_dim_);
op_->set_input_size_splits(*size_splits_);
op_->create_dynamic_output_y(param_->num_split_); op_->create_dynamic_output_y(param_->num_split_);
return RET_OK; return RET_OK;
} }
@@ -64,6 +64,14 @@ SplitNPUKernel::~SplitNPUKernel() {
delete op_; delete op_;
op_ = nullptr; op_ = nullptr;
} }
if (size_splits_ != nullptr) {
delete size_splits_;
size_splits_ = nullptr;
}
if (split_dim_ != nullptr) {
delete split_dim_;
split_dim_ = nullptr;
}
} }


REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Split, NPUKernelCreator<SplitNPUKernel>) REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Split, NPUKernelCreator<SplitNPUKernel>)


+ 2
- 0
mindspore/lite/src/runtime/kernel/npu/split_npu.h View File

@@ -39,6 +39,8 @@ class SplitNPUKernel : public NPUKernel {
private: private:
hiai::op::SplitV *op_ = nullptr; hiai::op::SplitV *op_ = nullptr;
SplitParameter *param_; SplitParameter *param_;
hiai::op::Const *size_splits_ = nullptr;
hiai::op::Const *split_dim_ = nullptr;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SPLIT_NPU_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SPLIT_NPU_H_

+ 7
- 3
mindspore/lite/src/runtime/kernel/npu/unsqueeze_npu.cc View File

@@ -43,11 +43,11 @@ int UnsqueezeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
ge::TensorDesc desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32); ge::TensorDesc desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32);
ge::TensorPtr tensor = std::make_shared<hiai::Tensor>(desc); ge::TensorPtr tensor = std::make_shared<hiai::Tensor>(desc);
tensor->SetData(reinterpret_cast<uint8_t *>(axis_.data()), size * sizeof(int)); tensor->SetData(reinterpret_cast<uint8_t *>(axis_.data()), size * sizeof(int));
auto axis = new hiai::op::Const(name_ + "_axis");
axis->set_attr_value(tensor);
axis_const_ = new hiai::op::Const(name_ + "_axis");
axis_const_->set_attr_value(tensor);


op_->set_input_x(*npu_inputs[0]); op_->set_input_x(*npu_inputs[0]);
op_->set_input_axis(*axis);
op_->set_input_axis(*axis_const_);


return RET_OK; return RET_OK;
} }
@@ -59,6 +59,10 @@ UnsqueezeNPUKernel::~UnsqueezeNPUKernel() {
delete op_; delete op_;
op_ = nullptr; op_ = nullptr;
} }
if (axis_const_ != nullptr) {
delete axis_const_;
axis_const_ = nullptr;
}
} }


REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Unsqueeze, NPUKernelCreator<UnsqueezeNPUKernel>) REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Unsqueeze, NPUKernelCreator<UnsqueezeNPUKernel>)


+ 1
- 0
mindspore/lite/src/runtime/kernel/npu/unsqueeze_npu.h View File

@@ -39,6 +39,7 @@ class UnsqueezeNPUKernel : public NPUKernel {


private: private:
hiai::op::ExpandDims *op_ = nullptr; hiai::op::ExpandDims *op_ = nullptr;
hiai::op::Const *axis_const_ = nullptr;
vector<int> axis_; vector<int> axis_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel


+ 7
- 6
mindspore/lite/src/scheduler.cc View File

@@ -539,8 +539,8 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector<kernel
} }
if (type == kernel::kNpuSubGraph) { if (type == kernel::kNpuSubGraph) {
#if SUPPORT_NPU #if SUPPORT_NPU
auto sub_kernel = new (std::nothrow)
kernel::SubGraphNpuKernel(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_);
auto sub_kernel = new (std::nothrow) kernel::SubGraphNpuKernel(input_tensors, output_tensors, input_kernels,
output_kernels, kernels, context_, npu_manager_);
if (sub_kernel == nullptr) { if (sub_kernel == nullptr) {
MS_LOG(ERROR) << "NPU subgraph new failed."; MS_LOG(ERROR) << "NPU subgraph new failed.";
return nullptr; return nullptr;
@@ -653,13 +653,14 @@ int Scheduler::RunPass(std::vector<kernel::LiteKernel *> *dst_kernels) {
return RET_OK; return RET_OK;
} }
auto transform_pass = new NPUTransformPass(context_, dst_kernels, src_tensors_); auto transform_pass = new NPUTransformPass(context_, dst_kernels, src_tensors_);
mindspore::lite::NPUPassManager::GetInstance()->AddPass(transform_pass);
MS_ASSERT(npu_pass_manager_ != nullptr);
npu_pass_manager_->AddPass(transform_pass);
auto concat_format_pass = new NPUInsertTransformPass(context_, dst_kernels, src_tensors_); auto concat_format_pass = new NPUInsertTransformPass(context_, dst_kernels, src_tensors_);
mindspore::lite::NPUPassManager::GetInstance()->AddPass(concat_format_pass);
npu_pass_manager_->AddPass(concat_format_pass);
auto fusion_pass = new NPUFusionPass(dst_kernels); auto fusion_pass = new NPUFusionPass(dst_kernels);
mindspore::lite::NPUPassManager::GetInstance()->AddPass(fusion_pass);
npu_pass_manager_->AddPass(fusion_pass);


ret = mindspore::lite::NPUPassManager::GetInstance()->Run();
ret = npu_pass_manager_->Run();
#endif #endif
return ret; return ret;
} }


+ 16
- 0
mindspore/lite/src/scheduler.h View File

@@ -23,12 +23,24 @@
#include "src/sub_graph_kernel.h" #include "src/sub_graph_kernel.h"
#include "src/inner_context.h" #include "src/inner_context.h"
#include "include/model.h" #include "include/model.h"
#if SUPPORT_NPU
#include "src/runtime/agent/npu/optimizer/npu_pass_manager.h"
#endif


namespace mindspore::lite { namespace mindspore::lite {
class Scheduler { class Scheduler {
public: public:
Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> *src_tensors) Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> *src_tensors)
: context_(ctx), src_model_(src_model), src_tensors_(src_tensors) {} : context_(ctx), src_model_(src_model), src_tensors_(src_tensors) {}
#if SUPPORT_NPU
Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> *src_tensors,
NPUManager *npu_manager = nullptr, NPUPassManager *npu_pass_manager = nullptr)
: context_(ctx),
src_model_(src_model),
src_tensors_(src_tensors),
npu_manager_(npu_manager),
npu_pass_manager_(npu_pass_manager) {}
#endif
~Scheduler() = default; ~Scheduler() = default;


int Schedule(std::vector<kernel::LiteKernel *> *dst_kernels); int Schedule(std::vector<kernel::LiteKernel *> *dst_kernels);
@@ -84,6 +96,10 @@ class Scheduler {
const InnerContext *context_ = nullptr; const InnerContext *context_ = nullptr;
Model *src_model_ = nullptr; Model *src_model_ = nullptr;
std::vector<Tensor *> *src_tensors_; std::vector<Tensor *> *src_tensors_;
#if SUPPORT_NPU
NPUManager *npu_manager_ = nullptr;
NPUPassManager *npu_pass_manager_ = nullptr;
#endif
std::vector<size_t> graph_output_node_indexes_; std::vector<size_t> graph_output_node_indexes_;
std::map<int, OpParameter *> op_parameters_; std::map<int, OpParameter *> op_parameters_;
}; };


Loading…
Cancel
Save