diff --git a/mindspore/lite/src/CMakeLists.txt b/mindspore/lite/src/CMakeLists.txt index 2c5822306e..4925ffe13d 100644 --- a/mindspore/lite/src/CMakeLists.txt +++ b/mindspore/lite/src/CMakeLists.txt @@ -28,6 +28,10 @@ set(API_SRC ${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/tensor/tensor_impl.cc ) +if(SUPPORT_NPU) + include_directories(${DDK_PATH}) +endif() + set(LITE_SRC ${API_SRC} ${CMAKE_CURRENT_SOURCE_DIR}/common/file_utils.cc @@ -157,7 +161,6 @@ else() endif() if(SUPPORT_NPU) add_subdirectory(runtime/agent/npu) - include_directories(${DDK_PATH}) target_link_libraries(mindspore-lite npu_kernel_mid) target_link_libraries(mindspore-lite_static npu_kernel_mid) endif() diff --git a/mindspore/lite/src/inner_context.cc b/mindspore/lite/src/inner_context.cc index 6f84c03056..33866be80b 100644 --- a/mindspore/lite/src/inner_context.cc +++ b/mindspore/lite/src/inner_context.cc @@ -31,6 +31,18 @@ InnerContext::InnerContext(const Context *context) { } } +#if SUPPORT_NPU +InnerContext::InnerContext(const Context *context, NPUManager *npu_manager) { + this->allocator = context->allocator; + this->thread_num_ = context->thread_num_; + this->device_list_.clear(); + for (auto &device_ctx : context->device_list_) { + this->device_list_.push_back(device_ctx); + } + this->npu_manager_ = npu_manager; +} +#endif + int InnerContext::Init() { if (RET_OK != this->IsValid()) { MS_LOG(ERROR) << "Context is not valid"; @@ -120,10 +132,11 @@ bool InnerContext::IsGpuEnabled() const { bool InnerContext::IsNpuEnabled() const { #ifdef SUPPORT_NPU + MS_ASSERT(npu_manager_ != nullptr); return this->device_list_.end() != std::find_if(this->device_list_.begin(), this->device_list_.end(), [](const DeviceContext &device) { return device.device_type_ == DT_NPU; }) && - mindspore::lite::NPUManager::GetInstance()->IsSupportNPU(); + npu_manager_->IsSupportNPU(); #else return false; #endif diff --git a/mindspore/lite/src/inner_context.h b/mindspore/lite/src/inner_context.h index 7a19826d3c..3a5f18182d 100644 --- a/mindspore/lite/src/inner_context.h +++ b/mindspore/lite/src/inner_context.h @@ -20,6 +20,9 @@ #include "include/context.h" #include "src/runtime/runtime_api.h" #include "src/runtime/allocator.h" +#ifdef SUPPORT_NPU +#include "src/runtime/agent/npu/npu_manager.h" +#endif namespace mindspore::lite { struct InnerContext : public Context { @@ -30,7 +33,9 @@ struct InnerContext : public Context { InnerContext() = default; explicit InnerContext(const Context *context); - +#if SUPPORT_NPU + InnerContext(const Context *context, NPUManager *npu_manager); +#endif int Init(); bool IsCpuFloat16Enabled() const; @@ -52,6 +57,12 @@ struct InnerContext : public Context { int IsValid() const; virtual ~InnerContext(); + +#if SUPPORT_NPU + + private: + NPUManager *npu_manager_ = nullptr; +#endif }; } // namespace mindspore::lite diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc index 4875231935..b78b97479e 100644 --- a/mindspore/lite/src/lite_session.cc +++ b/mindspore/lite/src/lite_session.cc @@ -395,7 +395,11 @@ int LiteSession::CompileGraph(Model *model) { return ret; } // scheduler kernels +#if SUPPORT_NPU + Scheduler scheduler(context_, model, &tensors_, npu_manager_, npu_pass_manager_); +#else Scheduler scheduler(context_, model, &tensors_); +#endif ret = scheduler.Schedule(&kernels_); if (ret != RET_OK) { MS_LOG(ERROR) << "Schedule kernels failed: " << ret; @@ -404,7 +408,8 @@ int LiteSession::CompileGraph(Model *model) { } #if SUPPORT_NPU if (this->context_->IsNpuEnabled()) { - if (mindspore::lite::NPUManager::GetInstance()->LoadOMModel() != RET_OK) { + MS_ASSERT(npu_manager_ != nullptr); + if (npu_manager_->LoadOMModel() != RET_OK) { MS_LOG(ERROR) << "NPU client load model failed."; return RET_ERROR; } @@ -501,14 +506,30 @@ int LiteSession::Init(const Context *context) { MS_LOG(ERROR) << "Not support multi-threading"; return RET_ERROR; } - +#if SUPPORT_NPU + npu_manager_ = new (std::nothrow) NPUManager(); + if (npu_manager_ == nullptr) { + MS_LOG(ERROR) << "New npu_manager_ failed"; + is_running_.store(false); + return RET_ERROR; + } + npu_pass_manager_ = new (std::nothrow) NPUPassManager(); + if (npu_pass_manager_ == nullptr) { + MS_LOG(ERROR) << "New npu_pass_manager_ failed"; + is_running_.store(false); + return RET_ERROR; + } +#endif if (context == nullptr) { MS_LOG(ERROR) << "context is nullptr"; is_running_.store(false); return RET_NULL_PTR; } - +#if SUPPORT_NPU + this->context_ = new (std::nothrow) InnerContext(context, npu_manager_); +#else this->context_ = new (std::nothrow) InnerContext(context); +#endif if (this->context_ == nullptr) { MS_LOG(ERROR) << "New Context failed"; is_running_.store(false); @@ -579,8 +600,12 @@ LiteSession::~LiteSession() { delete this->executor_; this->executor_ = nullptr; #if SUPPORT_NPU - mindspore::lite::NPUPassManager::GetInstance()->Clear(); - mindspore::lite::NPUManager::GetInstance()->Reset(); + MS_ASSERT(npu_manager_ != nullptr); + MS_ASSERT(npu_pass_manager_ != nullptr); + npu_pass_manager_->Clear(); + delete npu_pass_manager_; + npu_manager_->Reset(); + delete npu_manager_; #endif #if GPU_OPENCL && !SUPPORT_TRAIN delete opencl_runtime_wrapper_; diff --git a/mindspore/lite/src/lite_session.h b/mindspore/lite/src/lite_session.h index 181a6a9378..4baf6dbc7e 100644 --- a/mindspore/lite/src/lite_session.h +++ b/mindspore/lite/src/lite_session.h @@ -31,6 +31,10 @@ #include "src/executor.h" #include "src/tensor.h" #include "src/tensorlist.h" +#if SUPPORT_NPU +#include "src/runtime/agent/npu/npu_manager.h" +#include "src/runtime/agent/npu/optimizer/npu_pass_manager.h" +#endif #if GPU_OPENCL #include "src/runtime/gpu/opencl/opencl_runtime.h" #elif GPU_VULKAN @@ -129,6 +133,10 @@ class LiteSession : public session::LiteSession { Executor *executor_ = nullptr; Model *model_ = nullptr; std::atomic is_running_ = false; +#if SUPPORT_NPU + NPUManager *npu_manager_ = nullptr; + NPUPassManager *npu_pass_manager_ = nullptr; +#endif #if GPU_OPENCL && !SUPPORT_TRAIN opencl::OpenCLRuntimeWrapper *opencl_runtime_wrapper_{nullptr}; #elif GPU_VULKAN && !SUPPORT_TRAIN diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc index b88f5df9f8..ea079dc3ef 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc +++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc @@ -21,12 +21,19 @@ namespace mindspore::lite { NPUExecutor::~NPUExecutor() { client_.reset(); + for (auto t : npu_input_tensors_) { + t.reset(); + } npu_input_tensors_.clear(); + for (auto t : npu_output_tensors_) { + t.reset(); + } npu_output_tensors_.clear(); } int NPUExecutor::Prepare(const std::vector &kernels) { - this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(model_name_); + MS_ASSERT(npu_manager_ != nullptr); + this->client_ = npu_manager_->GetClient(model_name_); if (this->client_ == nullptr) { MS_LOG(ERROR) << "client is nullptr."; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.h b/mindspore/lite/src/runtime/agent/npu/npu_executor.h index 5ecdb94e8f..ad325f2934 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_executor.h +++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.h @@ -23,11 +23,15 @@ #include "src/executor.h" #include "include/errorcode.h" #include "include/HiAiModelManagerService.h" +#ifdef SUPPORT_NPU +#include "src/runtime/agent/npu/npu_manager.h" +#endif namespace mindspore::lite { class NPUExecutor : public Executor { public: - explicit NPUExecutor(const std::string &model_name) { this->model_name_ = model_name; } + explicit NPUExecutor(const std::string &model_name, NPUManager *npu_manager = nullptr) + : model_name_(model_name), npu_manager_(npu_manager) {} ~NPUExecutor() override; int Prepare(const std::vector &kernels) override; @@ -45,6 +49,7 @@ class NPUExecutor : public Executor { private: std::string model_name_; + NPUManager *npu_manager_ = nullptr; std::shared_ptr client_ = nullptr; std::vector> npu_input_tensors_; std::vector> npu_output_tensors_; diff --git a/mindspore/lite/src/runtime/agent/npu/npu_manager.cc b/mindspore/lite/src/runtime/agent/npu/npu_manager.cc index 7f0092ac3c..3fabcb6845 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_manager.cc +++ b/mindspore/lite/src/runtime/agent/npu/npu_manager.cc @@ -57,25 +57,25 @@ bool NPUManager::CheckEMUIVersion() { } void NPUManager::Reset() { + for (auto client : clients_) { + client->UnLoadModel(); + client.reset(); + } + clients_.clear(); + index_ = 0; domi::HiaiIrBuild ir_build; for (const auto &model_map : models_) { auto model = model_map.second; if (!model->is_freed_) { ir_build.ReleaseModelBuff(*model->model_buffer_data_); - model->model_buffer_data_ = nullptr; model->is_freed_ = true; - model->desc_.reset(); - model->desc_ = nullptr; - model->client_.reset(); } + model->model_buffer_data_.reset(); + model->desc_.reset(); + model->client_.reset(); } models_.clear(); - for (auto client : clients_) { - client->UnLoadModel(); - client.reset(); - } - clients_.clear(); } bool NPUManager::CheckDDKVersion() { @@ -141,8 +141,9 @@ bool NPUManager::IsKirinChip() { return false; } -int NPUManager::AddModel(domi::ModelBufferData *model_buffer_data, const std::string &model_name, int frequency) { - auto model = new SubGraphModel(index_, model_name, model_buffer_data); +int NPUManager::AddModel(std::shared_ptr model_buffer_data, const std::string &model_name, + int frequency) { + auto model = std::make_shared(index_, model_name, model_buffer_data); auto desc = std::make_shared(model_name, frequency, 0, 0, 0); model->desc_ = desc; models_.insert({model_name, model}); @@ -168,6 +169,7 @@ int NPUManager::LoadOMModel() { std::vector> models_desc; std::shared_ptr client = nullptr; std::shared_ptr mc_builder = nullptr; + std::unordered_map, hiai::MemBuffer *> builder_buffer_map; int total = 0; for (const auto &model_map : models_) { if (total % MAX_MODEL_NUM == 0) { @@ -194,7 +196,8 @@ int NPUManager::LoadOMModel() { MS_LOG(ERROR) << "NPU input memory buffer create failed."; return RET_ERROR; } - model->desc_->SetModelBuffer(model->model_buffer_data_->data, model->model_buffer_data_->length); + builder_buffer_map.insert({mc_builder, buffer}); + model->desc_->SetModelBuffer(buffer->GetMemBufferData(), buffer->GetMemBufferSize()); if (models_desc.size() == MAX_MODEL_NUM) { auto ret = LoadModel(client, models_desc); if (ret != RET_ERROR) { @@ -214,10 +217,17 @@ int NPUManager::LoadOMModel() { models_desc.clear(); } + for (auto it : builder_buffer_map) { + it.first->MemBufferDestroy(it.second); + } + builder_buffer_map.clear(); return RET_OK; } std::shared_ptr NPUManager::GetClient(const std::string &model_name) { + if (models_.find(model_name) == models_.end() || models_[model_name] == nullptr) { + return nullptr; + } return models_[model_name]->client_; } diff --git a/mindspore/lite/src/runtime/agent/npu/npu_manager.h b/mindspore/lite/src/runtime/agent/npu/npu_manager.h index 776a6534ef..b52c1bcc5d 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_manager.h +++ b/mindspore/lite/src/runtime/agent/npu/npu_manager.h @@ -32,7 +32,7 @@ static std::set npu_trans_nodes = { schema::PrimitiveType_MaxPoolFusion, schema::PrimitiveType_AvgPoolFusion}; struct SubGraphModel { public: - SubGraphModel(int index, std::string model_name, domi::ModelBufferData *model_buffer_data) + SubGraphModel(int index, std::string model_name, std::shared_ptr model_buffer_data) : index_(index), model_name_(std::move(model_name)), model_buffer_data_(model_buffer_data) {} bool is_freed_ = false; @@ -45,17 +45,14 @@ struct SubGraphModel { }; class NPUManager { public: - static NPUManager *GetInstance() { - static NPUManager manager; - return &manager; - } + NPUManager() = default; ~NPUManager() { Reset(); } bool IsSupportNPU(); // provide to subgraph to add model. - int AddModel(domi::ModelBufferData *model_buffer_data, const std::string &model_name, int frequency); + int AddModel(std::shared_ptr model_buffer_data, const std::string &model_name, int frequency); // scheduler to load om model. int LoadOMModel(); @@ -85,7 +82,7 @@ class NPUManager { int index_ = 0; bool is_check_version_ = false; bool is_support_ = false; - std::unordered_map models_; + std::unordered_map> models_; std::vector> clients_; }; diff --git a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc index 3c103c47bd..d4bce54220 100644 --- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc +++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc @@ -36,12 +36,17 @@ using mindspore::lite::RET_OK; SubGraphNpuKernel::~SubGraphNpuKernel() { subgraph_input_op_.clear(); subgraph_output_op_.clear(); + out_tensor_sorted_.clear(); + for (auto op : op_buffer_) { + delete op; + } if (executor_ != nullptr) { delete executor_; } + op_buffer_.clear(); } -domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { +std::shared_ptr SubGraphNpuKernel::BuildIRModel() { ge::Graph graph("NPUGraph"); auto ret = BuildNPUInputOp(); @@ -58,20 +63,18 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { ge::Model model(GetOMModelName(), mindspore::lite::Version()); model.SetGraph(graph); domi::HiaiIrBuild ir_build; - auto om_model_buff = new (std::nothrow) domi::ModelBufferData; + auto om_model_buff = std::make_shared(); if (om_model_buff == nullptr) { MS_LOG(ERROR) << "OM model buffer is nullptr."; return nullptr; } if (!ir_build.CreateModelBuff(model, *om_model_buff)) { MS_LOG(ERROR) << "Create model buffer failed."; - delete om_model_buff; return nullptr; } if (!ir_build.BuildIRModel(model, *om_model_buff)) { MS_LOG(ERROR) << "Build IR model failed."; ir_build.ReleaseModelBuff(*om_model_buff); - delete om_model_buff; return nullptr; } return om_model_buff; @@ -85,6 +88,7 @@ int SubGraphNpuKernel::Run() { int SubGraphNpuKernel::BuildNPUInputOp() { int count = 0; subgraph_input_op_.clear(); + op_buffer_.clear(); for (auto node : this->nodes_) { std::vector node_input_op; for (auto in_tensor : node->in_tensors()) { @@ -94,6 +98,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name); subgraph_input_op_.push_back(*data); node_input_op.push_back(data); + op_buffer_.push_back(data); continue; } @@ -130,6 +135,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { auto weight_tensor = mindspore::lite::ConverterToNPUTensor(in_tensor); weight_const->set_attr_value(weight_tensor); node_input_op.push_back(weight_const); + op_buffer_.push_back(weight_const); } } } @@ -140,6 +146,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { return RET_ERROR; } } + return RET_OK; } @@ -176,17 +183,18 @@ std::string SubGraphNpuKernel::GetOMModelName() { return this->name_ + ".om"; } int SubGraphNpuKernel::Init() { if (!is_compiled_) { - name_ = "kNpuSubGraph" + std::to_string(mindspore::lite::NPUManager::GetInstance()->index()); + name_ = "kNpuSubGraph" + std::to_string(npu_manager_->index()); auto model_buffer_data = BuildIRModel(); if (model_buffer_data == nullptr) { MS_LOG(ERROR) << "Build IR model failed."; return RET_ERROR; } - mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data, GetOMModelName(), - context_->GetNpuInfo().frequency_); + MS_ASSERT(npu_manager_ != nullptr); + + npu_manager_->AddModel(model_buffer_data, GetOMModelName(), context_->GetNpuInfo().frequency_); - executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName()); + executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName(), npu_manager_); if (executor_ == nullptr) { MS_LOG(ERROR) << "Create NPUExecutor failed."; diff --git a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h index c97e4a450c..490afbbe94 100644 --- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h +++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h @@ -18,10 +18,14 @@ #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_ #include #include +#include #include "include/hiai_ir_build.h" #include "src/sub_graph_kernel.h" #include "src/runtime/agent/npu/npu_executor.h" #include "include/graph/op/all_ops.h" +#ifdef SUPPORT_NPU +#include "src/runtime/agent/npu/npu_manager.h" +#endif namespace mindspore::kernel { using mindspore::lite::RET_ERROR; @@ -31,8 +35,8 @@ class SubGraphNpuKernel : public SubGraphKernel { SubGraphNpuKernel(const std::vector &inputs, const std::vector &outputs, const std::vector &inKernels, const std::vector &outKernels, const std::vector &nodes, - const lite::InnerContext *ctx = nullptr) - : SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx) { + const lite::InnerContext *ctx = nullptr, lite::NPUManager *npu_manager = nullptr) + : SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx), npu_manager_(npu_manager) { subgraph_type_ = kNpuSubGraph; } @@ -56,7 +60,7 @@ class SubGraphNpuKernel : public SubGraphKernel { } private: - domi::ModelBufferData *BuildIRModel(); + std::shared_ptr BuildIRModel(); int BuildNPUInputOp(); @@ -71,11 +75,15 @@ class SubGraphNpuKernel : public SubGraphKernel { private: bool is_compiled_ = false; + lite::NPUManager *npu_manager_ = nullptr; + std::vector subgraph_input_op_; std::vector subgraph_output_op_; std::vector out_tensor_sorted_; + + std::vector op_buffer_; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/fullconnection_npu.cc b/mindspore/lite/src/runtime/kernel/npu/fullconnection_npu.cc index 2655fcb3b0..56b46114bd 100644 --- a/mindspore/lite/src/runtime/kernel/npu/fullconnection_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/fullconnection_npu.cc @@ -42,13 +42,13 @@ int FullconnectionNPUKernel::SetNPUInputs(const std::vector &inp for (int i = 1; i < input_shape.size(); i++) { col *= input_shape[i]; } - auto reshape_op = new (std::nothrow) hiai::op::Const(name_ + "_reshape_data"); + reshape_op_ = new (std::nothrow) hiai::op::Const(name_ + "_reshape_data"); vector reshape_data = {input_shape[0], col}; ge::TensorDesc reshape_tensor_desc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_FLOAT); ge::TensorPtr reshape_tensor = std::make_shared(reshape_tensor_desc); reshape_tensor->SetData(reinterpret_cast(reshape_data.data()), 2 * sizeof(float)); - reshape_op->set_attr_value(reshape_tensor); - reshape_->set_input_shape(*reshape_op); + reshape_op_->set_attr_value(reshape_tensor); + reshape_->set_input_shape(*reshape_op_); fc_ = new (std::nothrow) hiai::op::MatMul(name_); if (fc_ == nullptr) { @@ -117,6 +117,10 @@ FullconnectionNPUKernel::~FullconnectionNPUKernel() { delete biasadd_; biasadd_ = nullptr; } + if (reshape_op_ != nullptr) { + delete reshape_op_; + reshape_op_ = nullptr; + } } REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_FullConnection, NPUKernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/npu/fullconnection_npu.h b/mindspore/lite/src/runtime/kernel/npu/fullconnection_npu.h index 699c0d6a6c..061abb545e 100644 --- a/mindspore/lite/src/runtime/kernel/npu/fullconnection_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/fullconnection_npu.h @@ -40,6 +40,7 @@ class FullconnectionNPUKernel : public ConvolutionBaseNPUKernel { hiai::op::Reshape *reshape_ = nullptr; hiai::op::MatMul *fc_ = nullptr; hiai::op::BiasAdd *biasadd_ = nullptr; + hiai::op::Const *reshape_op_ = nullptr; MatMulParameter *fc_param_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.cc b/mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.cc index f0578dedec..36aec24fea 100644 --- a/mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.cc @@ -39,11 +39,6 @@ int InstanceNormNPUKernel::SetNPUInputs(const std::vector &input } op_->set_input_x(*npu_inputs[0]); - auto gamma = new (std::nothrow) hiai::op::Const(name_ + "_gamma"); - if (gamma == nullptr) { - MS_LOG(ERROR) << "New gamma const failed."; - return RET_ERROR; - } auto gamma_shape = inputs[1]->shape(); std::shared_ptr gamma_tensor = std::shared_ptr(new (std::nothrow) ge::Tensor()); if (gamma_tensor == nullptr) { @@ -54,14 +49,14 @@ int InstanceNormNPUKernel::SetNPUInputs(const std::vector &input lite::ConverterToNPUDataType(inputs[1]->data_type())); gamma_tensor->SetTensorDesc(gamma_tensor_desc); gamma_tensor->SetData(reinterpret_cast(inputs[1]->data_c()), inputs[1]->Size()); - gamma->set_attr_value(gamma_tensor); - op_->set_input_gamma(*gamma); - - auto beta = new (std::nothrow) hiai::op::Const(name_ + "_beta"); - if (beta == nullptr) { - MS_LOG(ERROR) << "New beta const failed."; + gamma_ = new (std::nothrow) hiai::op::Const(name_ + "_gamma"); + if (gamma_ == nullptr) { + MS_LOG(ERROR) << "New gamma_ const failed."; return RET_ERROR; } + gamma_->set_attr_value(gamma_tensor); + op_->set_input_gamma(*gamma_); + auto beta_shape = inputs[2]->shape(); std::shared_ptr beta_tensor = std::shared_ptr(new (std::nothrow) ge::Tensor()); if (beta_tensor == nullptr) { @@ -72,8 +67,13 @@ int InstanceNormNPUKernel::SetNPUInputs(const std::vector &input lite::ConverterToNPUDataType(inputs[2]->data_type())); beta_tensor->SetTensorDesc(beta_tensor_desc); beta_tensor->SetData(reinterpret_cast(inputs[2]->data_c()), inputs[2]->Size()); - beta->set_attr_value(beta_tensor); - op_->set_input_beta(*beta); + beta_ = new (std::nothrow) hiai::op::Const(name_ + "_beta"); + if (beta_ == nullptr) { + MS_LOG(ERROR) << "New beta_ const failed."; + return RET_ERROR; + } + beta_->set_attr_value(beta_tensor); + op_->set_input_beta(*beta_); op_->set_attr_epsilon(instance_norm_param_->epsilon_); return RET_OK; } @@ -85,6 +85,14 @@ InstanceNormNPUKernel::~InstanceNormNPUKernel() { delete op_; op_ = nullptr; } + if (gamma_ != nullptr) { + delete gamma_; + gamma_ = nullptr; + } + if (beta_ != nullptr) { + delete beta_; + beta_ = nullptr; + } } REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_InstanceNorm, NPUKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.h b/mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.h index b71701d044..ce44ce84bb 100644 --- a/mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.h @@ -38,6 +38,8 @@ class InstanceNormNPUKernel : public NPUKernel { private: hiai::op::InstanceNorm *op_ = nullptr; + hiai::op::Const *gamma_ = nullptr; + hiai::op::Const *beta_ = nullptr; InstanceNormParameter *instance_norm_param_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h b/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h index fc69df177b..77ec44ebae 100644 --- a/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h +++ b/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h @@ -54,10 +54,12 @@ kernel::LiteKernel *NPUKernelCreator(const std::vector &inputs, if (!op_parameter->infer_flag_) { MS_LOG(ERROR) << "NPU does not support runtime inference shape. Type is:" << schema::EnumNamePrimitiveType(static_cast(op_parameter->type_)); + free(op_parameter); return nullptr; } if (inputs[0]->shape().size() > 4) { MS_LOG(ERROR) << "Npu does not support input tensor dims greater than 4"; + free(op_parameter); return nullptr; } auto *kernel = new (std::nothrow) T(op_parameter, inputs, outputs, ctx); diff --git a/mindspore/lite/src/runtime/kernel/npu/pad_npu.cc b/mindspore/lite/src/runtime/kernel/npu/pad_npu.cc index f6096f8b9a..3a3a504dd0 100644 --- a/mindspore/lite/src/runtime/kernel/npu/pad_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/pad_npu.cc @@ -51,19 +51,19 @@ int PadNPUKernel::SetNPUInputs(const std::vector &inputs, const ge::TensorDesc padding_tensor_desc(ge::Shape({size, 2}), ge::FORMAT_NCHW, ge::DT_INT32); ge::TensorPtr padding_tensor = std::make_shared(padding_tensor_desc); padding_tensor->SetData(reinterpret_cast(paddings_.data()), 2 * size * sizeof(int)); - auto paddings = new hiai::op::Const(name_ + "paddings"); - paddings->set_attr_value(padding_tensor); + hiai_paddings_ = new hiai::op::Const(name_ + "paddings"); + hiai_paddings_->set_attr_value(padding_tensor); ge::TensorDesc constant_values_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT); ge::TensorPtr constant_values_tensor = std::make_shared(constant_values_tensor_desc); vector constant_values_data_value = {param_->constant_value_}; constant_values_tensor->SetData(reinterpret_cast(constant_values_data_value.data()), 1 * sizeof(float)); - auto constant = new hiai::op::Const(name_ + "constant"); - constant->set_attr_value(constant_values_tensor); + hiai_constant_ = new hiai::op::Const(name_ + "constant"); + hiai_constant_->set_attr_value(constant_values_tensor); op_->set_input_x(*npu_inputs[0]); - op_->set_input_constant_values(*constant); - op_->set_input_paddings(*paddings); + op_->set_input_constant_values(*hiai_constant_); + op_->set_input_paddings(*hiai_paddings_); return RET_OK; } @@ -75,6 +75,14 @@ PadNPUKernel::~PadNPUKernel() { delete op_; op_ = nullptr; } + if (hiai_paddings_ != nullptr) { + delete hiai_paddings_; + hiai_paddings_ = nullptr; + } + if (hiai_constant_ != nullptr) { + delete hiai_constant_; + hiai_constant_ = nullptr; + } } REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_PadFusion, NPUKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/npu/pad_npu.h b/mindspore/lite/src/runtime/kernel/npu/pad_npu.h index 91c0447385..cb84903257 100644 --- a/mindspore/lite/src/runtime/kernel/npu/pad_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/pad_npu.h @@ -40,6 +40,8 @@ class PadNPUKernel : public NPUKernel { hiai::op::PadV2 *op_ = nullptr; PadParameter *param_; std::vector paddings_; + hiai::op::Const *hiai_paddings_ = nullptr; + hiai::op::Const *hiai_constant_ = nullptr; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_PAD_NPU_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/resize_npu.cc b/mindspore/lite/src/runtime/kernel/npu/resize_npu.cc index 3ec8b9b05a..74d7c8113c 100644 --- a/mindspore/lite/src/runtime/kernel/npu/resize_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/resize_npu.cc @@ -46,8 +46,8 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector &inputs, con vector dataValue = {static_cast(resize_parameter_->new_height_), static_cast(resize_parameter_->new_width_)}; sizeTensor->SetData(reinterpret_cast(dataValue.data()), 2 * sizeof(int32_t)); - auto out_size = new (std::nothrow) hiai::op::Const(name_ + "_size"); - out_size->set_attr_value(sizeTensor); + out_size_ = new (std::nothrow) hiai::op::Const(name_ + "_size"); + out_size_->set_attr_value(sizeTensor); if (resize_parameter_->method_ == schema::ResizeMethod_LINEAR) { auto op = new (std::nothrow) hiai::op::ResizeBilinearV2(name_); if (op == nullptr) { @@ -57,7 +57,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector &inputs, con op->set_attr_align_corners(resize_parameter_->coordinate_transform_mode_ == schema::CoordinateTransformMode_ALIGN_CORNERS); op->set_input_x(*npu_inputs[0]); - op->set_input_size(*out_size); + op->set_input_size(*out_size_); op->set_attr_half_pixel_centers(resize_parameter_->preserve_aspect_ratio_); op_ = op; } else if (resize_parameter_->method_ == schema::ResizeMethod_NEAREST) { @@ -69,7 +69,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector &inputs, con op->set_attr_align_corners(resize_parameter_->coordinate_transform_mode_ == schema::CoordinateTransformMode_ALIGN_CORNERS); op->set_input_x(*npu_inputs[0]); - op->set_input_size(*out_size); + op->set_input_size(*out_size_); op_ = op; } else { MS_LOG(WARNING) << "Unsupported resize method type:" << resize_parameter_->method_; @@ -85,6 +85,10 @@ ResizeNPUKernel::~ResizeNPUKernel() { delete op_; op_ = nullptr; } + if (out_size_ != nullptr) { + delete out_size_; + out_size_ = nullptr; + } } REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Resize, NPUKernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/npu/resize_npu.h b/mindspore/lite/src/runtime/kernel/npu/resize_npu.h index 698462dd87..93b6704b6c 100644 --- a/mindspore/lite/src/runtime/kernel/npu/resize_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/resize_npu.h @@ -40,6 +40,7 @@ class ResizeNPUKernel : public NPUKernel { private: ge::Operator *op_ = nullptr; + hiai::op::Const *out_size_ = nullptr; ResizeParameter *resize_parameter_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/npu/split_npu.cc b/mindspore/lite/src/runtime/kernel/npu/split_npu.cc index c279bff7ff..5db38952ab 100644 --- a/mindspore/lite/src/runtime/kernel/npu/split_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/split_npu.cc @@ -39,20 +39,20 @@ int SplitNPUKernel::SetNPUInputs(const std::vector &inputs, cons ge::TensorDesc size_splits_tensor_desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32); ge::TensorPtr size_splits_tensor = std::make_shared(size_splits_tensor_desc); size_splits_tensor->SetData(reinterpret_cast(param_->split_sizes_), size * sizeof(int)); - auto size_splits = new hiai::op::Const(name_ + "_size"); - size_splits->set_attr_value(size_splits_tensor); + size_splits_ = new hiai::op::Const(name_ + "_size"); + size_splits_->set_attr_value(size_splits_tensor); ge::TensorDesc split_dim_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_INT32); ge::TensorPtr split_dim_tensor = std::make_shared(split_dim_tensor_desc); vector split_dim_data_value = {param_->split_dim_}; split_dim_tensor->SetData(reinterpret_cast(split_dim_data_value.data()), 1 * sizeof(int)); - auto split_dim = new hiai::op::Const(name_ + "_dim"); - split_dim->set_attr_value(split_dim_tensor); + split_dim_ = new hiai::op::Const(name_ + "_dim"); + split_dim_->set_attr_value(split_dim_tensor); op_->set_input_x(*npu_inputs[0]); op_->set_attr_num_split(param_->num_split_); - op_->set_input_split_dim(*split_dim); - op_->set_input_size_splits(*size_splits); + op_->set_input_split_dim(*split_dim_); + op_->set_input_size_splits(*size_splits_); op_->create_dynamic_output_y(param_->num_split_); return RET_OK; } @@ -64,6 +64,14 @@ SplitNPUKernel::~SplitNPUKernel() { delete op_; op_ = nullptr; } + if (size_splits_ != nullptr) { + delete size_splits_; + size_splits_ = nullptr; + } + if (split_dim_ != nullptr) { + delete split_dim_; + split_dim_ = nullptr; + } } REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Split, NPUKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/npu/split_npu.h b/mindspore/lite/src/runtime/kernel/npu/split_npu.h index f67700f4e9..1361c33c66 100644 --- a/mindspore/lite/src/runtime/kernel/npu/split_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/split_npu.h @@ -39,6 +39,8 @@ class SplitNPUKernel : public NPUKernel { private: hiai::op::SplitV *op_ = nullptr; SplitParameter *param_; + hiai::op::Const *size_splits_ = nullptr; + hiai::op::Const *split_dim_ = nullptr; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SPLIT_NPU_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/unsqueeze_npu.cc b/mindspore/lite/src/runtime/kernel/npu/unsqueeze_npu.cc index fdcdbdd3d1..1f36621a16 100644 --- a/mindspore/lite/src/runtime/kernel/npu/unsqueeze_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/unsqueeze_npu.cc @@ -43,11 +43,11 @@ int UnsqueezeNPUKernel::SetNPUInputs(const std::vector &inputs, ge::TensorDesc desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32); ge::TensorPtr tensor = std::make_shared(desc); tensor->SetData(reinterpret_cast(axis_.data()), size * sizeof(int)); - auto axis = new hiai::op::Const(name_ + "_axis"); - axis->set_attr_value(tensor); + axis_const_ = new hiai::op::Const(name_ + "_axis"); + axis_const_->set_attr_value(tensor); op_->set_input_x(*npu_inputs[0]); - op_->set_input_axis(*axis); + op_->set_input_axis(*axis_const_); return RET_OK; } @@ -59,6 +59,10 @@ UnsqueezeNPUKernel::~UnsqueezeNPUKernel() { delete op_; op_ = nullptr; } + if (axis_const_ != nullptr) { + delete axis_const_; + axis_const_ = nullptr; + } } REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Unsqueeze, NPUKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/npu/unsqueeze_npu.h b/mindspore/lite/src/runtime/kernel/npu/unsqueeze_npu.h index 8078fdf2c7..06f0883d8c 100644 --- a/mindspore/lite/src/runtime/kernel/npu/unsqueeze_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/unsqueeze_npu.h @@ -39,6 +39,7 @@ class UnsqueezeNPUKernel : public NPUKernel { private: hiai::op::ExpandDims *op_ = nullptr; + hiai::op::Const *axis_const_ = nullptr; vector axis_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index 65152bbedf..ca676f101e 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -571,8 +571,8 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector *dst_kernels) { return RET_OK; } auto transform_pass = new NPUTransformPass(context_, dst_kernels, src_tensors_); - mindspore::lite::NPUPassManager::GetInstance()->AddPass(transform_pass); + MS_ASSERT(npu_pass_manager_ != nullptr); + npu_pass_manager_->AddPass(transform_pass); auto concat_format_pass = new NPUInsertTransformPass(context_, dst_kernels, src_tensors_); - mindspore::lite::NPUPassManager::GetInstance()->AddPass(concat_format_pass); + npu_pass_manager_->AddPass(concat_format_pass); auto fusion_pass = new NPUFusionPass(dst_kernels); - mindspore::lite::NPUPassManager::GetInstance()->AddPass(fusion_pass); + npu_pass_manager_->AddPass(fusion_pass); - ret = mindspore::lite::NPUPassManager::GetInstance()->Run(); + ret = npu_pass_manager_->Run(); #endif return ret; } diff --git a/mindspore/lite/src/scheduler.h b/mindspore/lite/src/scheduler.h index 79e32fe3df..f9e47759d2 100644 --- a/mindspore/lite/src/scheduler.h +++ b/mindspore/lite/src/scheduler.h @@ -23,12 +23,24 @@ #include "src/sub_graph_kernel.h" #include "src/inner_context.h" #include "include/model.h" +#if SUPPORT_NPU +#include "src/runtime/agent/npu/optimizer/npu_pass_manager.h" +#endif namespace mindspore::lite { class Scheduler { public: Scheduler(const InnerContext *ctx, Model *src_model, std::vector *src_tensors) : context_(ctx), src_model_(src_model), src_tensors_(src_tensors) {} +#if SUPPORT_NPU + Scheduler(const InnerContext *ctx, Model *src_model, std::vector *src_tensors, + NPUManager *npu_manager = nullptr, NPUPassManager *npu_pass_manager = nullptr) + : context_(ctx), + src_model_(src_model), + src_tensors_(src_tensors), + npu_manager_(npu_manager), + npu_pass_manager_(npu_pass_manager) {} +#endif ~Scheduler() = default; int Schedule(std::vector *dst_kernels); @@ -86,6 +98,10 @@ class Scheduler { const InnerContext *context_ = nullptr; Model *src_model_ = nullptr; std::vector *src_tensors_; +#if SUPPORT_NPU + NPUManager *npu_manager_ = nullptr; + NPUPassManager *npu_pass_manager_ = nullptr; +#endif std::vector graph_output_node_indexes_; std::map op_parameters_; };