| @@ -28,6 +28,10 @@ set(API_SRC | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/tensor/tensor_impl.cc | ${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/tensor/tensor_impl.cc | ||||
| ) | ) | ||||
| if(SUPPORT_NPU) | |||||
| include_directories(${DDK_PATH}) | |||||
| endif() | |||||
| set(LITE_SRC | set(LITE_SRC | ||||
| ${API_SRC} | ${API_SRC} | ||||
| ${CMAKE_CURRENT_SOURCE_DIR}/common/file_utils.cc | ${CMAKE_CURRENT_SOURCE_DIR}/common/file_utils.cc | ||||
| @@ -156,7 +160,6 @@ else() | |||||
| endif() | endif() | ||||
| if(SUPPORT_NPU) | if(SUPPORT_NPU) | ||||
| add_subdirectory(runtime/agent/npu) | add_subdirectory(runtime/agent/npu) | ||||
| include_directories(${DDK_PATH}) | |||||
| target_link_libraries(mindspore-lite npu_kernel_mid) | target_link_libraries(mindspore-lite npu_kernel_mid) | ||||
| target_link_libraries(mindspore-lite_static npu_kernel_mid) | target_link_libraries(mindspore-lite_static npu_kernel_mid) | ||||
| endif() | endif() | ||||
| @@ -31,6 +31,18 @@ InnerContext::InnerContext(const Context *context) { | |||||
| } | } | ||||
| } | } | ||||
| #if SUPPORT_NPU | |||||
| InnerContext::InnerContext(const Context *context, NPUManager *npu_manager) { | |||||
| this->allocator = context->allocator; | |||||
| this->thread_num_ = context->thread_num_; | |||||
| this->device_list_.clear(); | |||||
| for (auto &device_ctx : context->device_list_) { | |||||
| this->device_list_.push_back(device_ctx); | |||||
| } | |||||
| this->npu_manager_ = npu_manager; | |||||
| } | |||||
| #endif | |||||
| int InnerContext::Init() { | int InnerContext::Init() { | ||||
| if (RET_OK != this->IsValid()) { | if (RET_OK != this->IsValid()) { | ||||
| MS_LOG(ERROR) << "Context is not valid"; | MS_LOG(ERROR) << "Context is not valid"; | ||||
| @@ -120,10 +132,11 @@ bool InnerContext::IsGpuEnabled() const { | |||||
| bool InnerContext::IsNpuEnabled() const { | bool InnerContext::IsNpuEnabled() const { | ||||
| #ifdef SUPPORT_NPU | #ifdef SUPPORT_NPU | ||||
| MS_ASSERT(npu_manager_ != nullptr); | |||||
| return this->device_list_.end() != | return this->device_list_.end() != | ||||
| std::find_if(this->device_list_.begin(), this->device_list_.end(), | std::find_if(this->device_list_.begin(), this->device_list_.end(), | ||||
| [](const DeviceContext &device) { return device.device_type_ == DT_NPU; }) && | [](const DeviceContext &device) { return device.device_type_ == DT_NPU; }) && | ||||
| mindspore::lite::NPUManager::GetInstance()->IsSupportNPU(); | |||||
| npu_manager_->IsSupportNPU(); | |||||
| #else | #else | ||||
| return false; | return false; | ||||
| #endif | #endif | ||||
| @@ -20,6 +20,9 @@ | |||||
| #include "include/context.h" | #include "include/context.h" | ||||
| #include "src/runtime/runtime_api.h" | #include "src/runtime/runtime_api.h" | ||||
| #include "src/runtime/allocator.h" | #include "src/runtime/allocator.h" | ||||
| #ifdef SUPPORT_NPU | |||||
| #include "src/runtime/agent/npu/npu_manager.h" | |||||
| #endif | |||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| struct InnerContext : public Context { | struct InnerContext : public Context { | ||||
| @@ -30,7 +33,9 @@ struct InnerContext : public Context { | |||||
| InnerContext() = default; | InnerContext() = default; | ||||
| explicit InnerContext(const Context *context); | explicit InnerContext(const Context *context); | ||||
| #if SUPPORT_NPU | |||||
| InnerContext(const Context *context, NPUManager *npu_manager); | |||||
| #endif | |||||
| int Init(); | int Init(); | ||||
| bool IsCpuFloat16Enabled() const; | bool IsCpuFloat16Enabled() const; | ||||
| @@ -52,6 +57,12 @@ struct InnerContext : public Context { | |||||
| int IsValid() const; | int IsValid() const; | ||||
| virtual ~InnerContext(); | virtual ~InnerContext(); | ||||
| #if SUPPORT_NPU | |||||
| private: | |||||
| NPUManager *npu_manager_ = nullptr; | |||||
| #endif | |||||
| }; | }; | ||||
| } // namespace mindspore::lite | } // namespace mindspore::lite | ||||
| @@ -395,7 +395,11 @@ int LiteSession::CompileGraph(Model *model) { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| // scheduler kernels | // scheduler kernels | ||||
| #if SUPPORT_NPU | |||||
| Scheduler scheduler(context_, model, &tensors_, npu_manager_, npu_pass_manager_); | |||||
| #else | |||||
| Scheduler scheduler(context_, model, &tensors_); | Scheduler scheduler(context_, model, &tensors_); | ||||
| #endif | |||||
| ret = scheduler.Schedule(&kernels_); | ret = scheduler.Schedule(&kernels_); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Schedule kernels failed: " << ret; | MS_LOG(ERROR) << "Schedule kernels failed: " << ret; | ||||
| @@ -404,7 +408,8 @@ int LiteSession::CompileGraph(Model *model) { | |||||
| } | } | ||||
| #if SUPPORT_NPU | #if SUPPORT_NPU | ||||
| if (this->context_->IsNpuEnabled()) { | if (this->context_->IsNpuEnabled()) { | ||||
| if (mindspore::lite::NPUManager::GetInstance()->LoadOMModel() != RET_OK) { | |||||
| MS_ASSERT(npu_manager_ != nullptr); | |||||
| if (npu_manager_->LoadOMModel() != RET_OK) { | |||||
| MS_LOG(ERROR) << "NPU client load model failed."; | MS_LOG(ERROR) << "NPU client load model failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -501,14 +506,30 @@ int LiteSession::Init(const Context *context) { | |||||
| MS_LOG(ERROR) << "Not support multi-threading"; | MS_LOG(ERROR) << "Not support multi-threading"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| #if SUPPORT_NPU | |||||
| npu_manager_ = new (std::nothrow) NPUManager(); | |||||
| if (npu_manager_ == nullptr) { | |||||
| MS_LOG(ERROR) << "New npu_manager_ failed"; | |||||
| is_running_.store(false); | |||||
| return RET_ERROR; | |||||
| } | |||||
| npu_pass_manager_ = new (std::nothrow) NPUPassManager(); | |||||
| if (npu_pass_manager_ == nullptr) { | |||||
| MS_LOG(ERROR) << "New npu_pass_manager_ failed"; | |||||
| is_running_.store(false); | |||||
| return RET_ERROR; | |||||
| } | |||||
| #endif | |||||
| if (context == nullptr) { | if (context == nullptr) { | ||||
| MS_LOG(ERROR) << "context is nullptr"; | MS_LOG(ERROR) << "context is nullptr"; | ||||
| is_running_.store(false); | is_running_.store(false); | ||||
| return RET_NULL_PTR; | return RET_NULL_PTR; | ||||
| } | } | ||||
| #if SUPPORT_NPU | |||||
| this->context_ = new (std::nothrow) InnerContext(context, npu_manager_); | |||||
| #else | |||||
| this->context_ = new (std::nothrow) InnerContext(context); | this->context_ = new (std::nothrow) InnerContext(context); | ||||
| #endif | |||||
| if (this->context_ == nullptr) { | if (this->context_ == nullptr) { | ||||
| MS_LOG(ERROR) << "New Context failed"; | MS_LOG(ERROR) << "New Context failed"; | ||||
| is_running_.store(false); | is_running_.store(false); | ||||
| @@ -579,8 +600,12 @@ LiteSession::~LiteSession() { | |||||
| delete this->executor_; | delete this->executor_; | ||||
| this->executor_ = nullptr; | this->executor_ = nullptr; | ||||
| #if SUPPORT_NPU | #if SUPPORT_NPU | ||||
| mindspore::lite::NPUPassManager::GetInstance()->Clear(); | |||||
| mindspore::lite::NPUManager::GetInstance()->Reset(); | |||||
| MS_ASSERT(npu_manager_ != nullptr); | |||||
| MS_ASSERT(npu_pass_manager_ != nullptr); | |||||
| npu_pass_manager_->Clear(); | |||||
| delete npu_pass_manager_; | |||||
| npu_manager_->Reset(); | |||||
| delete npu_manager_; | |||||
| #endif | #endif | ||||
| #if GPU_OPENCL && !SUPPORT_TRAIN | #if GPU_OPENCL && !SUPPORT_TRAIN | ||||
| delete opencl_runtime_wrapper_; | delete opencl_runtime_wrapper_; | ||||
| @@ -31,6 +31,10 @@ | |||||
| #include "src/executor.h" | #include "src/executor.h" | ||||
| #include "src/tensor.h" | #include "src/tensor.h" | ||||
| #include "src/tensorlist.h" | #include "src/tensorlist.h" | ||||
| #if SUPPORT_NPU | |||||
| #include "src/runtime/agent/npu/npu_manager.h" | |||||
| #include "src/runtime/agent/npu/optimizer/npu_pass_manager.h" | |||||
| #endif | |||||
| #if GPU_OPENCL | #if GPU_OPENCL | ||||
| #include "src/runtime/gpu/opencl/opencl_runtime.h" | #include "src/runtime/gpu/opencl/opencl_runtime.h" | ||||
| #elif GPU_VULKAN | #elif GPU_VULKAN | ||||
| @@ -129,6 +133,10 @@ class LiteSession : public session::LiteSession { | |||||
| Executor *executor_ = nullptr; | Executor *executor_ = nullptr; | ||||
| Model *model_ = nullptr; | Model *model_ = nullptr; | ||||
| std::atomic<bool> is_running_ = false; | std::atomic<bool> is_running_ = false; | ||||
| #if SUPPORT_NPU | |||||
| NPUManager *npu_manager_ = nullptr; | |||||
| NPUPassManager *npu_pass_manager_ = nullptr; | |||||
| #endif | |||||
| #if GPU_OPENCL && !SUPPORT_TRAIN | #if GPU_OPENCL && !SUPPORT_TRAIN | ||||
| opencl::OpenCLRuntimeWrapper *opencl_runtime_wrapper_{nullptr}; | opencl::OpenCLRuntimeWrapper *opencl_runtime_wrapper_{nullptr}; | ||||
| #elif GPU_VULKAN && !SUPPORT_TRAIN | #elif GPU_VULKAN && !SUPPORT_TRAIN | ||||
| @@ -21,12 +21,19 @@ | |||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| NPUExecutor::~NPUExecutor() { | NPUExecutor::~NPUExecutor() { | ||||
| client_.reset(); | client_.reset(); | ||||
| for (auto t : npu_input_tensors_) { | |||||
| t.reset(); | |||||
| } | |||||
| npu_input_tensors_.clear(); | npu_input_tensors_.clear(); | ||||
| for (auto t : npu_output_tensors_) { | |||||
| t.reset(); | |||||
| } | |||||
| npu_output_tensors_.clear(); | npu_output_tensors_.clear(); | ||||
| } | } | ||||
| int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { | int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { | ||||
| this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(model_name_); | |||||
| MS_ASSERT(npu_manager_ != nullptr); | |||||
| this->client_ = npu_manager_->GetClient(model_name_); | |||||
| if (this->client_ == nullptr) { | if (this->client_ == nullptr) { | ||||
| MS_LOG(ERROR) << "client is nullptr."; | MS_LOG(ERROR) << "client is nullptr."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -23,11 +23,15 @@ | |||||
| #include "src/executor.h" | #include "src/executor.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "include/HiAiModelManagerService.h" | #include "include/HiAiModelManagerService.h" | ||||
| #ifdef SUPPORT_NPU | |||||
| #include "src/runtime/agent/npu/npu_manager.h" | |||||
| #endif | |||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| class NPUExecutor : public Executor { | class NPUExecutor : public Executor { | ||||
| public: | public: | ||||
| explicit NPUExecutor(const std::string &model_name) { this->model_name_ = model_name; } | |||||
| explicit NPUExecutor(const std::string &model_name, NPUManager *npu_manager = nullptr) | |||||
| : model_name_(model_name), npu_manager_(npu_manager) {} | |||||
| ~NPUExecutor() override; | ~NPUExecutor() override; | ||||
| int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override; | int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override; | ||||
| @@ -45,6 +49,7 @@ class NPUExecutor : public Executor { | |||||
| private: | private: | ||||
| std::string model_name_; | std::string model_name_; | ||||
| NPUManager *npu_manager_ = nullptr; | |||||
| std::shared_ptr<hiai::AiModelMngerClient> client_ = nullptr; | std::shared_ptr<hiai::AiModelMngerClient> client_ = nullptr; | ||||
| std::vector<std::shared_ptr<hiai::AiTensor>> npu_input_tensors_; | std::vector<std::shared_ptr<hiai::AiTensor>> npu_input_tensors_; | ||||
| std::vector<std::shared_ptr<hiai::AiTensor>> npu_output_tensors_; | std::vector<std::shared_ptr<hiai::AiTensor>> npu_output_tensors_; | ||||
| @@ -57,25 +57,25 @@ bool NPUManager::CheckEMUIVersion() { | |||||
| } | } | ||||
| void NPUManager::Reset() { | void NPUManager::Reset() { | ||||
| for (auto client : clients_) { | |||||
| client->UnLoadModel(); | |||||
| client.reset(); | |||||
| } | |||||
| clients_.clear(); | |||||
| index_ = 0; | index_ = 0; | ||||
| domi::HiaiIrBuild ir_build; | domi::HiaiIrBuild ir_build; | ||||
| for (const auto &model_map : models_) { | for (const auto &model_map : models_) { | ||||
| auto model = model_map.second; | auto model = model_map.second; | ||||
| if (!model->is_freed_) { | if (!model->is_freed_) { | ||||
| ir_build.ReleaseModelBuff(*model->model_buffer_data_); | ir_build.ReleaseModelBuff(*model->model_buffer_data_); | ||||
| model->model_buffer_data_ = nullptr; | |||||
| model->is_freed_ = true; | model->is_freed_ = true; | ||||
| model->desc_.reset(); | |||||
| model->desc_ = nullptr; | |||||
| model->client_.reset(); | |||||
| } | } | ||||
| model->model_buffer_data_.reset(); | |||||
| model->desc_.reset(); | |||||
| model->client_.reset(); | |||||
| } | } | ||||
| models_.clear(); | models_.clear(); | ||||
| for (auto client : clients_) { | |||||
| client->UnLoadModel(); | |||||
| client.reset(); | |||||
| } | |||||
| clients_.clear(); | |||||
| } | } | ||||
| bool NPUManager::CheckDDKVersion() { | bool NPUManager::CheckDDKVersion() { | ||||
| @@ -141,8 +141,9 @@ bool NPUManager::IsKirinChip() { | |||||
| return false; | return false; | ||||
| } | } | ||||
| int NPUManager::AddModel(domi::ModelBufferData *model_buffer_data, const std::string &model_name, int frequency) { | |||||
| auto model = new SubGraphModel(index_, model_name, model_buffer_data); | |||||
| int NPUManager::AddModel(std::shared_ptr<domi::ModelBufferData> model_buffer_data, const std::string &model_name, | |||||
| int frequency) { | |||||
| auto model = std::make_shared<SubGraphModel>(index_, model_name, model_buffer_data); | |||||
| auto desc = std::make_shared<hiai::AiModelDescription>(model_name, frequency, 0, 0, 0); | auto desc = std::make_shared<hiai::AiModelDescription>(model_name, frequency, 0, 0, 0); | ||||
| model->desc_ = desc; | model->desc_ = desc; | ||||
| models_.insert({model_name, model}); | models_.insert({model_name, model}); | ||||
| @@ -168,6 +169,7 @@ int NPUManager::LoadOMModel() { | |||||
| std::vector<std::shared_ptr<hiai::AiModelDescription>> models_desc; | std::vector<std::shared_ptr<hiai::AiModelDescription>> models_desc; | ||||
| std::shared_ptr<hiai::AiModelMngerClient> client = nullptr; | std::shared_ptr<hiai::AiModelMngerClient> client = nullptr; | ||||
| std::shared_ptr<hiai::AiModelBuilder> mc_builder = nullptr; | std::shared_ptr<hiai::AiModelBuilder> mc_builder = nullptr; | ||||
| std::unordered_map<std::shared_ptr<hiai::AiModelBuilder>, hiai::MemBuffer *> builder_buffer_map; | |||||
| int total = 0; | int total = 0; | ||||
| for (const auto &model_map : models_) { | for (const auto &model_map : models_) { | ||||
| if (total % MAX_MODEL_NUM == 0) { | if (total % MAX_MODEL_NUM == 0) { | ||||
| @@ -194,7 +196,8 @@ int NPUManager::LoadOMModel() { | |||||
| MS_LOG(ERROR) << "NPU input memory buffer create failed."; | MS_LOG(ERROR) << "NPU input memory buffer create failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| model->desc_->SetModelBuffer(model->model_buffer_data_->data, model->model_buffer_data_->length); | |||||
| builder_buffer_map.insert({mc_builder, buffer}); | |||||
| model->desc_->SetModelBuffer(buffer->GetMemBufferData(), buffer->GetMemBufferSize()); | |||||
| if (models_desc.size() == MAX_MODEL_NUM) { | if (models_desc.size() == MAX_MODEL_NUM) { | ||||
| auto ret = LoadModel(client, models_desc); | auto ret = LoadModel(client, models_desc); | ||||
| if (ret != RET_ERROR) { | if (ret != RET_ERROR) { | ||||
| @@ -214,10 +217,17 @@ int NPUManager::LoadOMModel() { | |||||
| models_desc.clear(); | models_desc.clear(); | ||||
| } | } | ||||
| for (auto it : builder_buffer_map) { | |||||
| it.first->MemBufferDestroy(it.second); | |||||
| } | |||||
| builder_buffer_map.clear(); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient(const std::string &model_name) { | std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient(const std::string &model_name) { | ||||
| if (models_.find(model_name) == models_.end() || models_[model_name] == nullptr) { | |||||
| return nullptr; | |||||
| } | |||||
| return models_[model_name]->client_; | return models_[model_name]->client_; | ||||
| } | } | ||||
| @@ -32,7 +32,7 @@ static std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = { | |||||
| schema::PrimitiveType_MaxPoolFusion, schema::PrimitiveType_AvgPoolFusion}; | schema::PrimitiveType_MaxPoolFusion, schema::PrimitiveType_AvgPoolFusion}; | ||||
| struct SubGraphModel { | struct SubGraphModel { | ||||
| public: | public: | ||||
| SubGraphModel(int index, std::string model_name, domi::ModelBufferData *model_buffer_data) | |||||
| SubGraphModel(int index, std::string model_name, std::shared_ptr<domi::ModelBufferData> model_buffer_data) | |||||
| : index_(index), model_name_(std::move(model_name)), model_buffer_data_(model_buffer_data) {} | : index_(index), model_name_(std::move(model_name)), model_buffer_data_(model_buffer_data) {} | ||||
| bool is_freed_ = false; | bool is_freed_ = false; | ||||
| @@ -45,17 +45,14 @@ struct SubGraphModel { | |||||
| }; | }; | ||||
| class NPUManager { | class NPUManager { | ||||
| public: | public: | ||||
| static NPUManager *GetInstance() { | |||||
| static NPUManager manager; | |||||
| return &manager; | |||||
| } | |||||
| NPUManager() = default; | |||||
| ~NPUManager() { Reset(); } | ~NPUManager() { Reset(); } | ||||
| bool IsSupportNPU(); | bool IsSupportNPU(); | ||||
| // provide to subgraph to add model. | // provide to subgraph to add model. | ||||
| int AddModel(domi::ModelBufferData *model_buffer_data, const std::string &model_name, int frequency); | |||||
| int AddModel(std::shared_ptr<domi::ModelBufferData> model_buffer_data, const std::string &model_name, int frequency); | |||||
| // scheduler to load om model. | // scheduler to load om model. | ||||
| int LoadOMModel(); | int LoadOMModel(); | ||||
| @@ -85,7 +82,7 @@ class NPUManager { | |||||
| int index_ = 0; | int index_ = 0; | ||||
| bool is_check_version_ = false; | bool is_check_version_ = false; | ||||
| bool is_support_ = false; | bool is_support_ = false; | ||||
| std::unordered_map<std::string, SubGraphModel *> models_; | |||||
| std::unordered_map<std::string, std::shared_ptr<SubGraphModel>> models_; | |||||
| std::vector<std::shared_ptr<hiai::AiModelMngerClient>> clients_; | std::vector<std::shared_ptr<hiai::AiModelMngerClient>> clients_; | ||||
| }; | }; | ||||
| @@ -36,12 +36,17 @@ using mindspore::lite::RET_OK; | |||||
| SubGraphNpuKernel::~SubGraphNpuKernel() { | SubGraphNpuKernel::~SubGraphNpuKernel() { | ||||
| subgraph_input_op_.clear(); | subgraph_input_op_.clear(); | ||||
| subgraph_output_op_.clear(); | subgraph_output_op_.clear(); | ||||
| out_tensor_sorted_.clear(); | |||||
| for (auto op : op_buffer_) { | |||||
| delete op; | |||||
| } | |||||
| if (executor_ != nullptr) { | if (executor_ != nullptr) { | ||||
| delete executor_; | delete executor_; | ||||
| } | } | ||||
| op_buffer_.clear(); | |||||
| } | } | ||||
| domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { | |||||
| std::shared_ptr<domi::ModelBufferData> SubGraphNpuKernel::BuildIRModel() { | |||||
| ge::Graph graph("NPUGraph"); | ge::Graph graph("NPUGraph"); | ||||
| auto ret = BuildNPUInputOp(); | auto ret = BuildNPUInputOp(); | ||||
| @@ -58,20 +63,18 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { | |||||
| ge::Model model(GetOMModelName(), mindspore::lite::Version()); | ge::Model model(GetOMModelName(), mindspore::lite::Version()); | ||||
| model.SetGraph(graph); | model.SetGraph(graph); | ||||
| domi::HiaiIrBuild ir_build; | domi::HiaiIrBuild ir_build; | ||||
| auto om_model_buff = new (std::nothrow) domi::ModelBufferData; | |||||
| auto om_model_buff = std::make_shared<domi::ModelBufferData>(); | |||||
| if (om_model_buff == nullptr) { | if (om_model_buff == nullptr) { | ||||
| MS_LOG(ERROR) << "OM model buffer is nullptr."; | MS_LOG(ERROR) << "OM model buffer is nullptr."; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| if (!ir_build.CreateModelBuff(model, *om_model_buff)) { | if (!ir_build.CreateModelBuff(model, *om_model_buff)) { | ||||
| MS_LOG(ERROR) << "Create model buffer failed."; | MS_LOG(ERROR) << "Create model buffer failed."; | ||||
| delete om_model_buff; | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| if (!ir_build.BuildIRModel(model, *om_model_buff)) { | if (!ir_build.BuildIRModel(model, *om_model_buff)) { | ||||
| MS_LOG(ERROR) << "Build IR model failed."; | MS_LOG(ERROR) << "Build IR model failed."; | ||||
| ir_build.ReleaseModelBuff(*om_model_buff); | ir_build.ReleaseModelBuff(*om_model_buff); | ||||
| delete om_model_buff; | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| return om_model_buff; | return om_model_buff; | ||||
| @@ -85,6 +88,7 @@ int SubGraphNpuKernel::Run() { | |||||
| int SubGraphNpuKernel::BuildNPUInputOp() { | int SubGraphNpuKernel::BuildNPUInputOp() { | ||||
| int count = 0; | int count = 0; | ||||
| subgraph_input_op_.clear(); | subgraph_input_op_.clear(); | ||||
| op_buffer_.clear(); | |||||
| for (auto node : this->nodes_) { | for (auto node : this->nodes_) { | ||||
| std::vector<ge::Operator *> node_input_op; | std::vector<ge::Operator *> node_input_op; | ||||
| for (auto in_tensor : node->in_tensors()) { | for (auto in_tensor : node->in_tensors()) { | ||||
| @@ -94,6 +98,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { | |||||
| data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name); | data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name); | ||||
| subgraph_input_op_.push_back(*data); | subgraph_input_op_.push_back(*data); | ||||
| node_input_op.push_back(data); | node_input_op.push_back(data); | ||||
| op_buffer_.push_back(data); | |||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -130,6 +135,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { | |||||
| auto weight_tensor = mindspore::lite::ConverterToNPUTensor(in_tensor); | auto weight_tensor = mindspore::lite::ConverterToNPUTensor(in_tensor); | ||||
| weight_const->set_attr_value(weight_tensor); | weight_const->set_attr_value(weight_tensor); | ||||
| node_input_op.push_back(weight_const); | node_input_op.push_back(weight_const); | ||||
| op_buffer_.push_back(weight_const); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -140,6 +146,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -176,17 +183,18 @@ std::string SubGraphNpuKernel::GetOMModelName() { return this->name_ + ".om"; } | |||||
| int SubGraphNpuKernel::Init() { | int SubGraphNpuKernel::Init() { | ||||
| if (!is_compiled_) { | if (!is_compiled_) { | ||||
| name_ = "kNpuSubGraph" + std::to_string(mindspore::lite::NPUManager::GetInstance()->index()); | |||||
| name_ = "kNpuSubGraph" + std::to_string(npu_manager_->index()); | |||||
| auto model_buffer_data = BuildIRModel(); | auto model_buffer_data = BuildIRModel(); | ||||
| if (model_buffer_data == nullptr) { | if (model_buffer_data == nullptr) { | ||||
| MS_LOG(ERROR) << "Build IR model failed."; | MS_LOG(ERROR) << "Build IR model failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data, GetOMModelName(), | |||||
| context_->GetNpuInfo().frequency_); | |||||
| MS_ASSERT(npu_manager_ != nullptr); | |||||
| npu_manager_->AddModel(model_buffer_data, GetOMModelName(), context_->GetNpuInfo().frequency_); | |||||
| executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName()); | |||||
| executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName(), npu_manager_); | |||||
| if (executor_ == nullptr) { | if (executor_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Create NPUExecutor failed."; | MS_LOG(ERROR) << "Create NPUExecutor failed."; | ||||
| @@ -18,10 +18,14 @@ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_ | ||||
| #include <vector> | #include <vector> | ||||
| #include <string> | #include <string> | ||||
| #include <memory> | |||||
| #include "include/hiai_ir_build.h" | #include "include/hiai_ir_build.h" | ||||
| #include "src/sub_graph_kernel.h" | #include "src/sub_graph_kernel.h" | ||||
| #include "src/runtime/agent/npu/npu_executor.h" | #include "src/runtime/agent/npu/npu_executor.h" | ||||
| #include "include/graph/op/all_ops.h" | #include "include/graph/op/all_ops.h" | ||||
| #ifdef SUPPORT_NPU | |||||
| #include "src/runtime/agent/npu/npu_manager.h" | |||||
| #endif | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| @@ -31,8 +35,8 @@ class SubGraphNpuKernel : public SubGraphKernel { | |||||
| SubGraphNpuKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | SubGraphNpuKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | ||||
| const std::vector<kernel::LiteKernel *> &inKernels, | const std::vector<kernel::LiteKernel *> &inKernels, | ||||
| const std::vector<kernel::LiteKernel *> &outKernels, const std::vector<kernel::LiteKernel *> &nodes, | const std::vector<kernel::LiteKernel *> &outKernels, const std::vector<kernel::LiteKernel *> &nodes, | ||||
| const lite::InnerContext *ctx = nullptr) | |||||
| : SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx) { | |||||
| const lite::InnerContext *ctx = nullptr, lite::NPUManager *npu_manager = nullptr) | |||||
| : SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx), npu_manager_(npu_manager) { | |||||
| subgraph_type_ = kNpuSubGraph; | subgraph_type_ = kNpuSubGraph; | ||||
| } | } | ||||
| @@ -56,7 +60,7 @@ class SubGraphNpuKernel : public SubGraphKernel { | |||||
| } | } | ||||
| private: | private: | ||||
| domi::ModelBufferData *BuildIRModel(); | |||||
| std::shared_ptr<domi::ModelBufferData> BuildIRModel(); | |||||
| int BuildNPUInputOp(); | int BuildNPUInputOp(); | ||||
| @@ -71,11 +75,15 @@ class SubGraphNpuKernel : public SubGraphKernel { | |||||
| private: | private: | ||||
| bool is_compiled_ = false; | bool is_compiled_ = false; | ||||
| lite::NPUManager *npu_manager_ = nullptr; | |||||
| std::vector<ge::Operator> subgraph_input_op_; | std::vector<ge::Operator> subgraph_input_op_; | ||||
| std::vector<ge::Operator> subgraph_output_op_; | std::vector<ge::Operator> subgraph_output_op_; | ||||
| std::vector<lite::Tensor *> out_tensor_sorted_; | std::vector<lite::Tensor *> out_tensor_sorted_; | ||||
| std::vector<ge::Operator *> op_buffer_; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_ | ||||
| @@ -42,13 +42,13 @@ int FullconnectionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inp | |||||
| for (int i = 1; i < input_shape.size(); i++) { | for (int i = 1; i < input_shape.size(); i++) { | ||||
| col *= input_shape[i]; | col *= input_shape[i]; | ||||
| } | } | ||||
| auto reshape_op = new (std::nothrow) hiai::op::Const(name_ + "_reshape_data"); | |||||
| reshape_op_ = new (std::nothrow) hiai::op::Const(name_ + "_reshape_data"); | |||||
| vector<int> reshape_data = {input_shape[0], col}; | vector<int> reshape_data = {input_shape[0], col}; | ||||
| ge::TensorDesc reshape_tensor_desc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_FLOAT); | ge::TensorDesc reshape_tensor_desc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_FLOAT); | ||||
| ge::TensorPtr reshape_tensor = std::make_shared<hiai::Tensor>(reshape_tensor_desc); | ge::TensorPtr reshape_tensor = std::make_shared<hiai::Tensor>(reshape_tensor_desc); | ||||
| reshape_tensor->SetData(reinterpret_cast<uint8_t *>(reshape_data.data()), 2 * sizeof(float)); | reshape_tensor->SetData(reinterpret_cast<uint8_t *>(reshape_data.data()), 2 * sizeof(float)); | ||||
| reshape_op->set_attr_value(reshape_tensor); | |||||
| reshape_->set_input_shape(*reshape_op); | |||||
| reshape_op_->set_attr_value(reshape_tensor); | |||||
| reshape_->set_input_shape(*reshape_op_); | |||||
| fc_ = new (std::nothrow) hiai::op::MatMul(name_); | fc_ = new (std::nothrow) hiai::op::MatMul(name_); | ||||
| if (fc_ == nullptr) { | if (fc_ == nullptr) { | ||||
| @@ -117,6 +117,10 @@ FullconnectionNPUKernel::~FullconnectionNPUKernel() { | |||||
| delete biasadd_; | delete biasadd_; | ||||
| biasadd_ = nullptr; | biasadd_ = nullptr; | ||||
| } | } | ||||
| if (reshape_op_ != nullptr) { | |||||
| delete reshape_op_; | |||||
| reshape_op_ = nullptr; | |||||
| } | |||||
| } | } | ||||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_FullConnection, NPUKernelCreator<FullconnectionNPUKernel>) | REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_FullConnection, NPUKernelCreator<FullconnectionNPUKernel>) | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -40,6 +40,7 @@ class FullconnectionNPUKernel : public ConvolutionBaseNPUKernel { | |||||
| hiai::op::Reshape *reshape_ = nullptr; | hiai::op::Reshape *reshape_ = nullptr; | ||||
| hiai::op::MatMul *fc_ = nullptr; | hiai::op::MatMul *fc_ = nullptr; | ||||
| hiai::op::BiasAdd *biasadd_ = nullptr; | hiai::op::BiasAdd *biasadd_ = nullptr; | ||||
| hiai::op::Const *reshape_op_ = nullptr; | |||||
| MatMulParameter *fc_param_; | MatMulParameter *fc_param_; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -39,11 +39,6 @@ int InstanceNormNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &input | |||||
| } | } | ||||
| op_->set_input_x(*npu_inputs[0]); | op_->set_input_x(*npu_inputs[0]); | ||||
| auto gamma = new (std::nothrow) hiai::op::Const(name_ + "_gamma"); | |||||
| if (gamma == nullptr) { | |||||
| MS_LOG(ERROR) << "New gamma const failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto gamma_shape = inputs[1]->shape(); | auto gamma_shape = inputs[1]->shape(); | ||||
| std::shared_ptr<ge::Tensor> gamma_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor()); | std::shared_ptr<ge::Tensor> gamma_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor()); | ||||
| if (gamma_tensor == nullptr) { | if (gamma_tensor == nullptr) { | ||||
| @@ -54,14 +49,14 @@ int InstanceNormNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &input | |||||
| lite::ConverterToNPUDataType(inputs[1]->data_type())); | lite::ConverterToNPUDataType(inputs[1]->data_type())); | ||||
| gamma_tensor->SetTensorDesc(gamma_tensor_desc); | gamma_tensor->SetTensorDesc(gamma_tensor_desc); | ||||
| gamma_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs[1]->data_c()), inputs[1]->Size()); | gamma_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs[1]->data_c()), inputs[1]->Size()); | ||||
| gamma->set_attr_value(gamma_tensor); | |||||
| op_->set_input_gamma(*gamma); | |||||
| auto beta = new (std::nothrow) hiai::op::Const(name_ + "_beta"); | |||||
| if (beta == nullptr) { | |||||
| MS_LOG(ERROR) << "New beta const failed."; | |||||
| gamma_ = new (std::nothrow) hiai::op::Const(name_ + "_gamma"); | |||||
| if (gamma_ == nullptr) { | |||||
| MS_LOG(ERROR) << "New gamma_ const failed."; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| gamma_->set_attr_value(gamma_tensor); | |||||
| op_->set_input_gamma(*gamma_); | |||||
| auto beta_shape = inputs[2]->shape(); | auto beta_shape = inputs[2]->shape(); | ||||
| std::shared_ptr<ge::Tensor> beta_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor()); | std::shared_ptr<ge::Tensor> beta_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor()); | ||||
| if (beta_tensor == nullptr) { | if (beta_tensor == nullptr) { | ||||
| @@ -72,8 +67,13 @@ int InstanceNormNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &input | |||||
| lite::ConverterToNPUDataType(inputs[2]->data_type())); | lite::ConverterToNPUDataType(inputs[2]->data_type())); | ||||
| beta_tensor->SetTensorDesc(beta_tensor_desc); | beta_tensor->SetTensorDesc(beta_tensor_desc); | ||||
| beta_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs[2]->data_c()), inputs[2]->Size()); | beta_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs[2]->data_c()), inputs[2]->Size()); | ||||
| beta->set_attr_value(beta_tensor); | |||||
| op_->set_input_beta(*beta); | |||||
| beta_ = new (std::nothrow) hiai::op::Const(name_ + "_beta"); | |||||
| if (beta_ == nullptr) { | |||||
| MS_LOG(ERROR) << "New beta_ const failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| beta_->set_attr_value(beta_tensor); | |||||
| op_->set_input_beta(*beta_); | |||||
| op_->set_attr_epsilon(instance_norm_param_->epsilon_); | op_->set_attr_epsilon(instance_norm_param_->epsilon_); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -85,6 +85,14 @@ InstanceNormNPUKernel::~InstanceNormNPUKernel() { | |||||
| delete op_; | delete op_; | ||||
| op_ = nullptr; | op_ = nullptr; | ||||
| } | } | ||||
| if (gamma_ != nullptr) { | |||||
| delete gamma_; | |||||
| gamma_ = nullptr; | |||||
| } | |||||
| if (beta_ != nullptr) { | |||||
| delete beta_; | |||||
| beta_ = nullptr; | |||||
| } | |||||
| } | } | ||||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_InstanceNorm, NPUKernelCreator<InstanceNormNPUKernel>) | REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_InstanceNorm, NPUKernelCreator<InstanceNormNPUKernel>) | ||||
| @@ -38,6 +38,8 @@ class InstanceNormNPUKernel : public NPUKernel { | |||||
| private: | private: | ||||
| hiai::op::InstanceNorm *op_ = nullptr; | hiai::op::InstanceNorm *op_ = nullptr; | ||||
| hiai::op::Const *gamma_ = nullptr; | |||||
| hiai::op::Const *beta_ = nullptr; | |||||
| InstanceNormParameter *instance_norm_param_; | InstanceNormParameter *instance_norm_param_; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -54,10 +54,12 @@ kernel::LiteKernel *NPUKernelCreator(const std::vector<lite::Tensor *> &inputs, | |||||
| if (!op_parameter->infer_flag_) { | if (!op_parameter->infer_flag_) { | ||||
| MS_LOG(ERROR) << "NPU does not support runtime inference shape. Type is:" | MS_LOG(ERROR) << "NPU does not support runtime inference shape. Type is:" | ||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_)); | << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_)); | ||||
| free(op_parameter); | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| if (inputs[0]->shape().size() > 4) { | if (inputs[0]->shape().size() > 4) { | ||||
| MS_LOG(ERROR) << "Npu does not support input tensor dims greater than 4"; | MS_LOG(ERROR) << "Npu does not support input tensor dims greater than 4"; | ||||
| free(op_parameter); | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto *kernel = new (std::nothrow) T(op_parameter, inputs, outputs, ctx); | auto *kernel = new (std::nothrow) T(op_parameter, inputs, outputs, ctx); | ||||
| @@ -51,19 +51,19 @@ int PadNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const | |||||
| ge::TensorDesc padding_tensor_desc(ge::Shape({size, 2}), ge::FORMAT_NCHW, ge::DT_INT32); | ge::TensorDesc padding_tensor_desc(ge::Shape({size, 2}), ge::FORMAT_NCHW, ge::DT_INT32); | ||||
| ge::TensorPtr padding_tensor = std::make_shared<hiai::Tensor>(padding_tensor_desc); | ge::TensorPtr padding_tensor = std::make_shared<hiai::Tensor>(padding_tensor_desc); | ||||
| padding_tensor->SetData(reinterpret_cast<uint8_t *>(paddings_.data()), 2 * size * sizeof(int)); | padding_tensor->SetData(reinterpret_cast<uint8_t *>(paddings_.data()), 2 * size * sizeof(int)); | ||||
| auto paddings = new hiai::op::Const(name_ + "paddings"); | |||||
| paddings->set_attr_value(padding_tensor); | |||||
| hiai_paddings_ = new hiai::op::Const(name_ + "paddings"); | |||||
| hiai_paddings_->set_attr_value(padding_tensor); | |||||
| ge::TensorDesc constant_values_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT); | ge::TensorDesc constant_values_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT); | ||||
| ge::TensorPtr constant_values_tensor = std::make_shared<hiai::Tensor>(constant_values_tensor_desc); | ge::TensorPtr constant_values_tensor = std::make_shared<hiai::Tensor>(constant_values_tensor_desc); | ||||
| vector<float> constant_values_data_value = {param_->constant_value_}; | vector<float> constant_values_data_value = {param_->constant_value_}; | ||||
| constant_values_tensor->SetData(reinterpret_cast<uint8_t *>(constant_values_data_value.data()), 1 * sizeof(float)); | constant_values_tensor->SetData(reinterpret_cast<uint8_t *>(constant_values_data_value.data()), 1 * sizeof(float)); | ||||
| auto constant = new hiai::op::Const(name_ + "constant"); | |||||
| constant->set_attr_value(constant_values_tensor); | |||||
| hiai_constant_ = new hiai::op::Const(name_ + "constant"); | |||||
| hiai_constant_->set_attr_value(constant_values_tensor); | |||||
| op_->set_input_x(*npu_inputs[0]); | op_->set_input_x(*npu_inputs[0]); | ||||
| op_->set_input_constant_values(*constant); | |||||
| op_->set_input_paddings(*paddings); | |||||
| op_->set_input_constant_values(*hiai_constant_); | |||||
| op_->set_input_paddings(*hiai_paddings_); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -75,6 +75,14 @@ PadNPUKernel::~PadNPUKernel() { | |||||
| delete op_; | delete op_; | ||||
| op_ = nullptr; | op_ = nullptr; | ||||
| } | } | ||||
| if (hiai_paddings_ != nullptr) { | |||||
| delete hiai_paddings_; | |||||
| hiai_paddings_ = nullptr; | |||||
| } | |||||
| if (hiai_constant_ != nullptr) { | |||||
| delete hiai_constant_; | |||||
| hiai_constant_ = nullptr; | |||||
| } | |||||
| } | } | ||||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_PadFusion, NPUKernelCreator<PadNPUKernel>) | REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_PadFusion, NPUKernelCreator<PadNPUKernel>) | ||||
| @@ -40,6 +40,8 @@ class PadNPUKernel : public NPUKernel { | |||||
| hiai::op::PadV2 *op_ = nullptr; | hiai::op::PadV2 *op_ = nullptr; | ||||
| PadParameter *param_; | PadParameter *param_; | ||||
| std::vector<int> paddings_; | std::vector<int> paddings_; | ||||
| hiai::op::Const *hiai_paddings_ = nullptr; | |||||
| hiai::op::Const *hiai_constant_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_PAD_NPU_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_PAD_NPU_H_ | ||||
| @@ -46,8 +46,8 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||||
| vector<int32_t> dataValue = {static_cast<int32_t>(resize_parameter_->new_height_), | vector<int32_t> dataValue = {static_cast<int32_t>(resize_parameter_->new_height_), | ||||
| static_cast<int32_t>(resize_parameter_->new_width_)}; | static_cast<int32_t>(resize_parameter_->new_width_)}; | ||||
| sizeTensor->SetData(reinterpret_cast<uint8_t *>(dataValue.data()), 2 * sizeof(int32_t)); | sizeTensor->SetData(reinterpret_cast<uint8_t *>(dataValue.data()), 2 * sizeof(int32_t)); | ||||
| auto out_size = new (std::nothrow) hiai::op::Const(name_ + "_size"); | |||||
| out_size->set_attr_value(sizeTensor); | |||||
| out_size_ = new (std::nothrow) hiai::op::Const(name_ + "_size"); | |||||
| out_size_->set_attr_value(sizeTensor); | |||||
| if (resize_parameter_->method_ == schema::ResizeMethod_LINEAR) { | if (resize_parameter_->method_ == schema::ResizeMethod_LINEAR) { | ||||
| auto op = new (std::nothrow) hiai::op::ResizeBilinearV2(name_); | auto op = new (std::nothrow) hiai::op::ResizeBilinearV2(name_); | ||||
| if (op == nullptr) { | if (op == nullptr) { | ||||
| @@ -57,7 +57,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||||
| op->set_attr_align_corners(resize_parameter_->coordinate_transform_mode_ == | op->set_attr_align_corners(resize_parameter_->coordinate_transform_mode_ == | ||||
| schema::CoordinateTransformMode_ALIGN_CORNERS); | schema::CoordinateTransformMode_ALIGN_CORNERS); | ||||
| op->set_input_x(*npu_inputs[0]); | op->set_input_x(*npu_inputs[0]); | ||||
| op->set_input_size(*out_size); | |||||
| op->set_input_size(*out_size_); | |||||
| op->set_attr_half_pixel_centers(resize_parameter_->preserve_aspect_ratio_); | op->set_attr_half_pixel_centers(resize_parameter_->preserve_aspect_ratio_); | ||||
| op_ = op; | op_ = op; | ||||
| } else if (resize_parameter_->method_ == schema::ResizeMethod_NEAREST) { | } else if (resize_parameter_->method_ == schema::ResizeMethod_NEAREST) { | ||||
| @@ -69,7 +69,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||||
| op->set_attr_align_corners(resize_parameter_->coordinate_transform_mode_ == | op->set_attr_align_corners(resize_parameter_->coordinate_transform_mode_ == | ||||
| schema::CoordinateTransformMode_ALIGN_CORNERS); | schema::CoordinateTransformMode_ALIGN_CORNERS); | ||||
| op->set_input_x(*npu_inputs[0]); | op->set_input_x(*npu_inputs[0]); | ||||
| op->set_input_size(*out_size); | |||||
| op->set_input_size(*out_size_); | |||||
| op_ = op; | op_ = op; | ||||
| } else { | } else { | ||||
| MS_LOG(WARNING) << "Unsupported resize method type:" << resize_parameter_->method_; | MS_LOG(WARNING) << "Unsupported resize method type:" << resize_parameter_->method_; | ||||
| @@ -85,6 +85,10 @@ ResizeNPUKernel::~ResizeNPUKernel() { | |||||
| delete op_; | delete op_; | ||||
| op_ = nullptr; | op_ = nullptr; | ||||
| } | } | ||||
| if (out_size_ != nullptr) { | |||||
| delete out_size_; | |||||
| out_size_ = nullptr; | |||||
| } | |||||
| } | } | ||||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Resize, NPUKernelCreator<ResizeNPUKernel>) | REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Resize, NPUKernelCreator<ResizeNPUKernel>) | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -40,6 +40,7 @@ class ResizeNPUKernel : public NPUKernel { | |||||
| private: | private: | ||||
| ge::Operator *op_ = nullptr; | ge::Operator *op_ = nullptr; | ||||
| hiai::op::Const *out_size_ = nullptr; | |||||
| ResizeParameter *resize_parameter_; | ResizeParameter *resize_parameter_; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -39,20 +39,20 @@ int SplitNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, cons | |||||
| ge::TensorDesc size_splits_tensor_desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32); | ge::TensorDesc size_splits_tensor_desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32); | ||||
| ge::TensorPtr size_splits_tensor = std::make_shared<hiai::Tensor>(size_splits_tensor_desc); | ge::TensorPtr size_splits_tensor = std::make_shared<hiai::Tensor>(size_splits_tensor_desc); | ||||
| size_splits_tensor->SetData(reinterpret_cast<uint8_t *>(param_->split_sizes_), size * sizeof(int)); | size_splits_tensor->SetData(reinterpret_cast<uint8_t *>(param_->split_sizes_), size * sizeof(int)); | ||||
| auto size_splits = new hiai::op::Const(name_ + "_size"); | |||||
| size_splits->set_attr_value(size_splits_tensor); | |||||
| size_splits_ = new hiai::op::Const(name_ + "_size"); | |||||
| size_splits_->set_attr_value(size_splits_tensor); | |||||
| ge::TensorDesc split_dim_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_INT32); | ge::TensorDesc split_dim_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_INT32); | ||||
| ge::TensorPtr split_dim_tensor = std::make_shared<hiai::Tensor>(split_dim_tensor_desc); | ge::TensorPtr split_dim_tensor = std::make_shared<hiai::Tensor>(split_dim_tensor_desc); | ||||
| vector<int32_t> split_dim_data_value = {param_->split_dim_}; | vector<int32_t> split_dim_data_value = {param_->split_dim_}; | ||||
| split_dim_tensor->SetData(reinterpret_cast<uint8_t *>(split_dim_data_value.data()), 1 * sizeof(int)); | split_dim_tensor->SetData(reinterpret_cast<uint8_t *>(split_dim_data_value.data()), 1 * sizeof(int)); | ||||
| auto split_dim = new hiai::op::Const(name_ + "_dim"); | |||||
| split_dim->set_attr_value(split_dim_tensor); | |||||
| split_dim_ = new hiai::op::Const(name_ + "_dim"); | |||||
| split_dim_->set_attr_value(split_dim_tensor); | |||||
| op_->set_input_x(*npu_inputs[0]); | op_->set_input_x(*npu_inputs[0]); | ||||
| op_->set_attr_num_split(param_->num_split_); | op_->set_attr_num_split(param_->num_split_); | ||||
| op_->set_input_split_dim(*split_dim); | |||||
| op_->set_input_size_splits(*size_splits); | |||||
| op_->set_input_split_dim(*split_dim_); | |||||
| op_->set_input_size_splits(*size_splits_); | |||||
| op_->create_dynamic_output_y(param_->num_split_); | op_->create_dynamic_output_y(param_->num_split_); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -64,6 +64,14 @@ SplitNPUKernel::~SplitNPUKernel() { | |||||
| delete op_; | delete op_; | ||||
| op_ = nullptr; | op_ = nullptr; | ||||
| } | } | ||||
| if (size_splits_ != nullptr) { | |||||
| delete size_splits_; | |||||
| size_splits_ = nullptr; | |||||
| } | |||||
| if (split_dim_ != nullptr) { | |||||
| delete split_dim_; | |||||
| split_dim_ = nullptr; | |||||
| } | |||||
| } | } | ||||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Split, NPUKernelCreator<SplitNPUKernel>) | REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Split, NPUKernelCreator<SplitNPUKernel>) | ||||
| @@ -39,6 +39,8 @@ class SplitNPUKernel : public NPUKernel { | |||||
| private: | private: | ||||
| hiai::op::SplitV *op_ = nullptr; | hiai::op::SplitV *op_ = nullptr; | ||||
| SplitParameter *param_; | SplitParameter *param_; | ||||
| hiai::op::Const *size_splits_ = nullptr; | |||||
| hiai::op::Const *split_dim_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SPLIT_NPU_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SPLIT_NPU_H_ | ||||
| @@ -43,11 +43,11 @@ int UnsqueezeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||||
| ge::TensorDesc desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32); | ge::TensorDesc desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32); | ||||
| ge::TensorPtr tensor = std::make_shared<hiai::Tensor>(desc); | ge::TensorPtr tensor = std::make_shared<hiai::Tensor>(desc); | ||||
| tensor->SetData(reinterpret_cast<uint8_t *>(axis_.data()), size * sizeof(int)); | tensor->SetData(reinterpret_cast<uint8_t *>(axis_.data()), size * sizeof(int)); | ||||
| auto axis = new hiai::op::Const(name_ + "_axis"); | |||||
| axis->set_attr_value(tensor); | |||||
| axis_const_ = new hiai::op::Const(name_ + "_axis"); | |||||
| axis_const_->set_attr_value(tensor); | |||||
| op_->set_input_x(*npu_inputs[0]); | op_->set_input_x(*npu_inputs[0]); | ||||
| op_->set_input_axis(*axis); | |||||
| op_->set_input_axis(*axis_const_); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -59,6 +59,10 @@ UnsqueezeNPUKernel::~UnsqueezeNPUKernel() { | |||||
| delete op_; | delete op_; | ||||
| op_ = nullptr; | op_ = nullptr; | ||||
| } | } | ||||
| if (axis_const_ != nullptr) { | |||||
| delete axis_const_; | |||||
| axis_const_ = nullptr; | |||||
| } | |||||
| } | } | ||||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Unsqueeze, NPUKernelCreator<UnsqueezeNPUKernel>) | REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Unsqueeze, NPUKernelCreator<UnsqueezeNPUKernel>) | ||||
| @@ -39,6 +39,7 @@ class UnsqueezeNPUKernel : public NPUKernel { | |||||
| private: | private: | ||||
| hiai::op::ExpandDims *op_ = nullptr; | hiai::op::ExpandDims *op_ = nullptr; | ||||
| hiai::op::Const *axis_const_ = nullptr; | |||||
| vector<int> axis_; | vector<int> axis_; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -539,8 +539,8 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector<kernel | |||||
| } | } | ||||
| if (type == kernel::kNpuSubGraph) { | if (type == kernel::kNpuSubGraph) { | ||||
| #if SUPPORT_NPU | #if SUPPORT_NPU | ||||
| auto sub_kernel = new (std::nothrow) | |||||
| kernel::SubGraphNpuKernel(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_); | |||||
| auto sub_kernel = new (std::nothrow) kernel::SubGraphNpuKernel(input_tensors, output_tensors, input_kernels, | |||||
| output_kernels, kernels, context_, npu_manager_); | |||||
| if (sub_kernel == nullptr) { | if (sub_kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "NPU subgraph new failed."; | MS_LOG(ERROR) << "NPU subgraph new failed."; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -653,13 +653,14 @@ int Scheduler::RunPass(std::vector<kernel::LiteKernel *> *dst_kernels) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| auto transform_pass = new NPUTransformPass(context_, dst_kernels, src_tensors_); | auto transform_pass = new NPUTransformPass(context_, dst_kernels, src_tensors_); | ||||
| mindspore::lite::NPUPassManager::GetInstance()->AddPass(transform_pass); | |||||
| MS_ASSERT(npu_pass_manager_ != nullptr); | |||||
| npu_pass_manager_->AddPass(transform_pass); | |||||
| auto concat_format_pass = new NPUInsertTransformPass(context_, dst_kernels, src_tensors_); | auto concat_format_pass = new NPUInsertTransformPass(context_, dst_kernels, src_tensors_); | ||||
| mindspore::lite::NPUPassManager::GetInstance()->AddPass(concat_format_pass); | |||||
| npu_pass_manager_->AddPass(concat_format_pass); | |||||
| auto fusion_pass = new NPUFusionPass(dst_kernels); | auto fusion_pass = new NPUFusionPass(dst_kernels); | ||||
| mindspore::lite::NPUPassManager::GetInstance()->AddPass(fusion_pass); | |||||
| npu_pass_manager_->AddPass(fusion_pass); | |||||
| ret = mindspore::lite::NPUPassManager::GetInstance()->Run(); | |||||
| ret = npu_pass_manager_->Run(); | |||||
| #endif | #endif | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -23,12 +23,24 @@ | |||||
| #include "src/sub_graph_kernel.h" | #include "src/sub_graph_kernel.h" | ||||
| #include "src/inner_context.h" | #include "src/inner_context.h" | ||||
| #include "include/model.h" | #include "include/model.h" | ||||
| #if SUPPORT_NPU | |||||
| #include "src/runtime/agent/npu/optimizer/npu_pass_manager.h" | |||||
| #endif | |||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| class Scheduler { | class Scheduler { | ||||
| public: | public: | ||||
| Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> *src_tensors) | Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> *src_tensors) | ||||
| : context_(ctx), src_model_(src_model), src_tensors_(src_tensors) {} | : context_(ctx), src_model_(src_model), src_tensors_(src_tensors) {} | ||||
| #if SUPPORT_NPU | |||||
| Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> *src_tensors, | |||||
| NPUManager *npu_manager = nullptr, NPUPassManager *npu_pass_manager = nullptr) | |||||
| : context_(ctx), | |||||
| src_model_(src_model), | |||||
| src_tensors_(src_tensors), | |||||
| npu_manager_(npu_manager), | |||||
| npu_pass_manager_(npu_pass_manager) {} | |||||
| #endif | |||||
| ~Scheduler() = default; | ~Scheduler() = default; | ||||
| int Schedule(std::vector<kernel::LiteKernel *> *dst_kernels); | int Schedule(std::vector<kernel::LiteKernel *> *dst_kernels); | ||||
| @@ -84,6 +96,10 @@ class Scheduler { | |||||
| const InnerContext *context_ = nullptr; | const InnerContext *context_ = nullptr; | ||||
| Model *src_model_ = nullptr; | Model *src_model_ = nullptr; | ||||
| std::vector<Tensor *> *src_tensors_; | std::vector<Tensor *> *src_tensors_; | ||||
| #if SUPPORT_NPU | |||||
| NPUManager *npu_manager_ = nullptr; | |||||
| NPUPassManager *npu_pass_manager_ = nullptr; | |||||
| #endif | |||||
| std::vector<size_t> graph_output_node_indexes_; | std::vector<size_t> graph_output_node_indexes_; | ||||
| std::map<int, OpParameter *> op_parameters_; | std::map<int, OpParameter *> op_parameters_; | ||||
| }; | }; | ||||