| @@ -28,6 +28,10 @@ set(API_SRC | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/tensor/tensor_impl.cc | |||
| ) | |||
| if(SUPPORT_NPU) | |||
| include_directories(${DDK_PATH}) | |||
| endif() | |||
| set(LITE_SRC | |||
| ${API_SRC} | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/common/file_utils.cc | |||
| @@ -156,7 +160,6 @@ else() | |||
| endif() | |||
| if(SUPPORT_NPU) | |||
| add_subdirectory(runtime/agent/npu) | |||
| include_directories(${DDK_PATH}) | |||
| target_link_libraries(mindspore-lite npu_kernel_mid) | |||
| target_link_libraries(mindspore-lite_static npu_kernel_mid) | |||
| endif() | |||
| @@ -31,6 +31,18 @@ InnerContext::InnerContext(const Context *context) { | |||
| } | |||
| } | |||
| #if SUPPORT_NPU | |||
| InnerContext::InnerContext(const Context *context, NPUManager *npu_manager) { | |||
| this->allocator = context->allocator; | |||
| this->thread_num_ = context->thread_num_; | |||
| this->device_list_.clear(); | |||
| for (auto &device_ctx : context->device_list_) { | |||
| this->device_list_.push_back(device_ctx); | |||
| } | |||
| this->npu_manager_ = npu_manager; | |||
| } | |||
| #endif | |||
| int InnerContext::Init() { | |||
| if (RET_OK != this->IsValid()) { | |||
| MS_LOG(ERROR) << "Context is not valid"; | |||
| @@ -120,10 +132,11 @@ bool InnerContext::IsGpuEnabled() const { | |||
| bool InnerContext::IsNpuEnabled() const { | |||
| #ifdef SUPPORT_NPU | |||
| MS_ASSERT(npu_manager_ != nullptr); | |||
| return this->device_list_.end() != | |||
| std::find_if(this->device_list_.begin(), this->device_list_.end(), | |||
| [](const DeviceContext &device) { return device.device_type_ == DT_NPU; }) && | |||
| mindspore::lite::NPUManager::GetInstance()->IsSupportNPU(); | |||
| npu_manager_->IsSupportNPU(); | |||
| #else | |||
| return false; | |||
| #endif | |||
| @@ -20,6 +20,9 @@ | |||
| #include "include/context.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "src/runtime/allocator.h" | |||
| #ifdef SUPPORT_NPU | |||
| #include "src/runtime/agent/npu/npu_manager.h" | |||
| #endif | |||
| namespace mindspore::lite { | |||
| struct InnerContext : public Context { | |||
| @@ -30,7 +33,9 @@ struct InnerContext : public Context { | |||
| InnerContext() = default; | |||
| explicit InnerContext(const Context *context); | |||
| #if SUPPORT_NPU | |||
| InnerContext(const Context *context, NPUManager *npu_manager); | |||
| #endif | |||
| int Init(); | |||
| bool IsCpuFloat16Enabled() const; | |||
| @@ -52,6 +57,12 @@ struct InnerContext : public Context { | |||
| int IsValid() const; | |||
| virtual ~InnerContext(); | |||
| #if SUPPORT_NPU | |||
| private: | |||
| NPUManager *npu_manager_ = nullptr; | |||
| #endif | |||
| }; | |||
| } // namespace mindspore::lite | |||
| @@ -395,7 +395,11 @@ int LiteSession::CompileGraph(Model *model) { | |||
| return ret; | |||
| } | |||
| // scheduler kernels | |||
| #if SUPPORT_NPU | |||
| Scheduler scheduler(context_, model, &tensors_, npu_manager_, npu_pass_manager_); | |||
| #else | |||
| Scheduler scheduler(context_, model, &tensors_); | |||
| #endif | |||
| ret = scheduler.Schedule(&kernels_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Schedule kernels failed: " << ret; | |||
| @@ -404,7 +408,8 @@ int LiteSession::CompileGraph(Model *model) { | |||
| } | |||
| #if SUPPORT_NPU | |||
| if (this->context_->IsNpuEnabled()) { | |||
| if (mindspore::lite::NPUManager::GetInstance()->LoadOMModel() != RET_OK) { | |||
| MS_ASSERT(npu_manager_ != nullptr); | |||
| if (npu_manager_->LoadOMModel() != RET_OK) { | |||
| MS_LOG(ERROR) << "NPU client load model failed."; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -501,14 +506,30 @@ int LiteSession::Init(const Context *context) { | |||
| MS_LOG(ERROR) << "Not support multi-threading"; | |||
| return RET_ERROR; | |||
| } | |||
| #if SUPPORT_NPU | |||
| npu_manager_ = new (std::nothrow) NPUManager(); | |||
| if (npu_manager_ == nullptr) { | |||
| MS_LOG(ERROR) << "New npu_manager_ failed"; | |||
| is_running_.store(false); | |||
| return RET_ERROR; | |||
| } | |||
| npu_pass_manager_ = new (std::nothrow) NPUPassManager(); | |||
| if (npu_pass_manager_ == nullptr) { | |||
| MS_LOG(ERROR) << "New npu_pass_manager_ failed"; | |||
| is_running_.store(false); | |||
| return RET_ERROR; | |||
| } | |||
| #endif | |||
| if (context == nullptr) { | |||
| MS_LOG(ERROR) << "context is nullptr"; | |||
| is_running_.store(false); | |||
| return RET_NULL_PTR; | |||
| } | |||
| #if SUPPORT_NPU | |||
| this->context_ = new (std::nothrow) InnerContext(context, npu_manager_); | |||
| #else | |||
| this->context_ = new (std::nothrow) InnerContext(context); | |||
| #endif | |||
| if (this->context_ == nullptr) { | |||
| MS_LOG(ERROR) << "New Context failed"; | |||
| is_running_.store(false); | |||
| @@ -579,8 +600,12 @@ LiteSession::~LiteSession() { | |||
| delete this->executor_; | |||
| this->executor_ = nullptr; | |||
| #if SUPPORT_NPU | |||
| mindspore::lite::NPUPassManager::GetInstance()->Clear(); | |||
| mindspore::lite::NPUManager::GetInstance()->Reset(); | |||
| MS_ASSERT(npu_manager_ != nullptr); | |||
| MS_ASSERT(npu_pass_manager_ != nullptr); | |||
| npu_pass_manager_->Clear(); | |||
| delete npu_pass_manager_; | |||
| npu_manager_->Reset(); | |||
| delete npu_manager_; | |||
| #endif | |||
| #if GPU_OPENCL && !SUPPORT_TRAIN | |||
| delete opencl_runtime_wrapper_; | |||
| @@ -31,6 +31,10 @@ | |||
| #include "src/executor.h" | |||
| #include "src/tensor.h" | |||
| #include "src/tensorlist.h" | |||
| #if SUPPORT_NPU | |||
| #include "src/runtime/agent/npu/npu_manager.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_manager.h" | |||
| #endif | |||
| #if GPU_OPENCL | |||
| #include "src/runtime/gpu/opencl/opencl_runtime.h" | |||
| #elif GPU_VULKAN | |||
| @@ -129,6 +133,10 @@ class LiteSession : public session::LiteSession { | |||
| Executor *executor_ = nullptr; | |||
| Model *model_ = nullptr; | |||
| std::atomic<bool> is_running_ = false; | |||
| #if SUPPORT_NPU | |||
| NPUManager *npu_manager_ = nullptr; | |||
| NPUPassManager *npu_pass_manager_ = nullptr; | |||
| #endif | |||
| #if GPU_OPENCL && !SUPPORT_TRAIN | |||
| opencl::OpenCLRuntimeWrapper *opencl_runtime_wrapper_{nullptr}; | |||
| #elif GPU_VULKAN && !SUPPORT_TRAIN | |||
| @@ -21,12 +21,19 @@ | |||
| namespace mindspore::lite { | |||
| NPUExecutor::~NPUExecutor() { | |||
| client_.reset(); | |||
| for (auto t : npu_input_tensors_) { | |||
| t.reset(); | |||
| } | |||
| npu_input_tensors_.clear(); | |||
| for (auto t : npu_output_tensors_) { | |||
| t.reset(); | |||
| } | |||
| npu_output_tensors_.clear(); | |||
| } | |||
| int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { | |||
| this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(model_name_); | |||
| MS_ASSERT(npu_manager_ != nullptr); | |||
| this->client_ = npu_manager_->GetClient(model_name_); | |||
| if (this->client_ == nullptr) { | |||
| MS_LOG(ERROR) << "client is nullptr."; | |||
| return RET_ERROR; | |||
| @@ -23,11 +23,15 @@ | |||
| #include "src/executor.h" | |||
| #include "include/errorcode.h" | |||
| #include "include/HiAiModelManagerService.h" | |||
| #ifdef SUPPORT_NPU | |||
| #include "src/runtime/agent/npu/npu_manager.h" | |||
| #endif | |||
| namespace mindspore::lite { | |||
| class NPUExecutor : public Executor { | |||
| public: | |||
| explicit NPUExecutor(const std::string &model_name) { this->model_name_ = model_name; } | |||
| explicit NPUExecutor(const std::string &model_name, NPUManager *npu_manager = nullptr) | |||
| : model_name_(model_name), npu_manager_(npu_manager) {} | |||
| ~NPUExecutor() override; | |||
| int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override; | |||
| @@ -45,6 +49,7 @@ class NPUExecutor : public Executor { | |||
| private: | |||
| std::string model_name_; | |||
| NPUManager *npu_manager_ = nullptr; | |||
| std::shared_ptr<hiai::AiModelMngerClient> client_ = nullptr; | |||
| std::vector<std::shared_ptr<hiai::AiTensor>> npu_input_tensors_; | |||
| std::vector<std::shared_ptr<hiai::AiTensor>> npu_output_tensors_; | |||
| @@ -57,25 +57,25 @@ bool NPUManager::CheckEMUIVersion() { | |||
| } | |||
| void NPUManager::Reset() { | |||
| for (auto client : clients_) { | |||
| client->UnLoadModel(); | |||
| client.reset(); | |||
| } | |||
| clients_.clear(); | |||
| index_ = 0; | |||
| domi::HiaiIrBuild ir_build; | |||
| for (const auto &model_map : models_) { | |||
| auto model = model_map.second; | |||
| if (!model->is_freed_) { | |||
| ir_build.ReleaseModelBuff(*model->model_buffer_data_); | |||
| model->model_buffer_data_ = nullptr; | |||
| model->is_freed_ = true; | |||
| model->desc_.reset(); | |||
| model->desc_ = nullptr; | |||
| model->client_.reset(); | |||
| } | |||
| model->model_buffer_data_.reset(); | |||
| model->desc_.reset(); | |||
| model->client_.reset(); | |||
| } | |||
| models_.clear(); | |||
| for (auto client : clients_) { | |||
| client->UnLoadModel(); | |||
| client.reset(); | |||
| } | |||
| clients_.clear(); | |||
| } | |||
| bool NPUManager::CheckDDKVersion() { | |||
| @@ -141,8 +141,9 @@ bool NPUManager::IsKirinChip() { | |||
| return false; | |||
| } | |||
| int NPUManager::AddModel(domi::ModelBufferData *model_buffer_data, const std::string &model_name, int frequency) { | |||
| auto model = new SubGraphModel(index_, model_name, model_buffer_data); | |||
| int NPUManager::AddModel(std::shared_ptr<domi::ModelBufferData> model_buffer_data, const std::string &model_name, | |||
| int frequency) { | |||
| auto model = std::make_shared<SubGraphModel>(index_, model_name, model_buffer_data); | |||
| auto desc = std::make_shared<hiai::AiModelDescription>(model_name, frequency, 0, 0, 0); | |||
| model->desc_ = desc; | |||
| models_.insert({model_name, model}); | |||
| @@ -168,6 +169,7 @@ int NPUManager::LoadOMModel() { | |||
| std::vector<std::shared_ptr<hiai::AiModelDescription>> models_desc; | |||
| std::shared_ptr<hiai::AiModelMngerClient> client = nullptr; | |||
| std::shared_ptr<hiai::AiModelBuilder> mc_builder = nullptr; | |||
| std::unordered_map<std::shared_ptr<hiai::AiModelBuilder>, hiai::MemBuffer *> builder_buffer_map; | |||
| int total = 0; | |||
| for (const auto &model_map : models_) { | |||
| if (total % MAX_MODEL_NUM == 0) { | |||
| @@ -194,7 +196,8 @@ int NPUManager::LoadOMModel() { | |||
| MS_LOG(ERROR) << "NPU input memory buffer create failed."; | |||
| return RET_ERROR; | |||
| } | |||
| model->desc_->SetModelBuffer(model->model_buffer_data_->data, model->model_buffer_data_->length); | |||
| builder_buffer_map.insert({mc_builder, buffer}); | |||
| model->desc_->SetModelBuffer(buffer->GetMemBufferData(), buffer->GetMemBufferSize()); | |||
| if (models_desc.size() == MAX_MODEL_NUM) { | |||
| auto ret = LoadModel(client, models_desc); | |||
| if (ret != RET_ERROR) { | |||
| @@ -214,10 +217,17 @@ int NPUManager::LoadOMModel() { | |||
| models_desc.clear(); | |||
| } | |||
| for (auto it : builder_buffer_map) { | |||
| it.first->MemBufferDestroy(it.second); | |||
| } | |||
| builder_buffer_map.clear(); | |||
| return RET_OK; | |||
| } | |||
| std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient(const std::string &model_name) { | |||
| if (models_.find(model_name) == models_.end() || models_[model_name] == nullptr) { | |||
| return nullptr; | |||
| } | |||
| return models_[model_name]->client_; | |||
| } | |||
| @@ -32,7 +32,7 @@ static std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = { | |||
| schema::PrimitiveType_MaxPoolFusion, schema::PrimitiveType_AvgPoolFusion}; | |||
| struct SubGraphModel { | |||
| public: | |||
| SubGraphModel(int index, std::string model_name, domi::ModelBufferData *model_buffer_data) | |||
| SubGraphModel(int index, std::string model_name, std::shared_ptr<domi::ModelBufferData> model_buffer_data) | |||
| : index_(index), model_name_(std::move(model_name)), model_buffer_data_(model_buffer_data) {} | |||
| bool is_freed_ = false; | |||
| @@ -45,17 +45,14 @@ struct SubGraphModel { | |||
| }; | |||
| class NPUManager { | |||
| public: | |||
| static NPUManager *GetInstance() { | |||
| static NPUManager manager; | |||
| return &manager; | |||
| } | |||
| NPUManager() = default; | |||
| ~NPUManager() { Reset(); } | |||
| bool IsSupportNPU(); | |||
| // provide to subgraph to add model. | |||
| int AddModel(domi::ModelBufferData *model_buffer_data, const std::string &model_name, int frequency); | |||
| int AddModel(std::shared_ptr<domi::ModelBufferData> model_buffer_data, const std::string &model_name, int frequency); | |||
| // scheduler to load om model. | |||
| int LoadOMModel(); | |||
| @@ -85,7 +82,7 @@ class NPUManager { | |||
| int index_ = 0; | |||
| bool is_check_version_ = false; | |||
| bool is_support_ = false; | |||
| std::unordered_map<std::string, SubGraphModel *> models_; | |||
| std::unordered_map<std::string, std::shared_ptr<SubGraphModel>> models_; | |||
| std::vector<std::shared_ptr<hiai::AiModelMngerClient>> clients_; | |||
| }; | |||
| @@ -36,12 +36,17 @@ using mindspore::lite::RET_OK; | |||
| SubGraphNpuKernel::~SubGraphNpuKernel() { | |||
| subgraph_input_op_.clear(); | |||
| subgraph_output_op_.clear(); | |||
| out_tensor_sorted_.clear(); | |||
| for (auto op : op_buffer_) { | |||
| delete op; | |||
| } | |||
| if (executor_ != nullptr) { | |||
| delete executor_; | |||
| } | |||
| op_buffer_.clear(); | |||
| } | |||
| domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { | |||
| std::shared_ptr<domi::ModelBufferData> SubGraphNpuKernel::BuildIRModel() { | |||
| ge::Graph graph("NPUGraph"); | |||
| auto ret = BuildNPUInputOp(); | |||
| @@ -58,20 +63,18 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { | |||
| ge::Model model(GetOMModelName(), mindspore::lite::Version()); | |||
| model.SetGraph(graph); | |||
| domi::HiaiIrBuild ir_build; | |||
| auto om_model_buff = new (std::nothrow) domi::ModelBufferData; | |||
| auto om_model_buff = std::make_shared<domi::ModelBufferData>(); | |||
| if (om_model_buff == nullptr) { | |||
| MS_LOG(ERROR) << "OM model buffer is nullptr."; | |||
| return nullptr; | |||
| } | |||
| if (!ir_build.CreateModelBuff(model, *om_model_buff)) { | |||
| MS_LOG(ERROR) << "Create model buffer failed."; | |||
| delete om_model_buff; | |||
| return nullptr; | |||
| } | |||
| if (!ir_build.BuildIRModel(model, *om_model_buff)) { | |||
| MS_LOG(ERROR) << "Build IR model failed."; | |||
| ir_build.ReleaseModelBuff(*om_model_buff); | |||
| delete om_model_buff; | |||
| return nullptr; | |||
| } | |||
| return om_model_buff; | |||
| @@ -85,6 +88,7 @@ int SubGraphNpuKernel::Run() { | |||
| int SubGraphNpuKernel::BuildNPUInputOp() { | |||
| int count = 0; | |||
| subgraph_input_op_.clear(); | |||
| op_buffer_.clear(); | |||
| for (auto node : this->nodes_) { | |||
| std::vector<ge::Operator *> node_input_op; | |||
| for (auto in_tensor : node->in_tensors()) { | |||
| @@ -94,6 +98,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { | |||
| data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name); | |||
| subgraph_input_op_.push_back(*data); | |||
| node_input_op.push_back(data); | |||
| op_buffer_.push_back(data); | |||
| continue; | |||
| } | |||
| @@ -130,6 +135,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { | |||
| auto weight_tensor = mindspore::lite::ConverterToNPUTensor(in_tensor); | |||
| weight_const->set_attr_value(weight_tensor); | |||
| node_input_op.push_back(weight_const); | |||
| op_buffer_.push_back(weight_const); | |||
| } | |||
| } | |||
| } | |||
| @@ -140,6 +146,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -176,17 +183,18 @@ std::string SubGraphNpuKernel::GetOMModelName() { return this->name_ + ".om"; } | |||
| int SubGraphNpuKernel::Init() { | |||
| if (!is_compiled_) { | |||
| name_ = "kNpuSubGraph" + std::to_string(mindspore::lite::NPUManager::GetInstance()->index()); | |||
| name_ = "kNpuSubGraph" + std::to_string(npu_manager_->index()); | |||
| auto model_buffer_data = BuildIRModel(); | |||
| if (model_buffer_data == nullptr) { | |||
| MS_LOG(ERROR) << "Build IR model failed."; | |||
| return RET_ERROR; | |||
| } | |||
| mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data, GetOMModelName(), | |||
| context_->GetNpuInfo().frequency_); | |||
| MS_ASSERT(npu_manager_ != nullptr); | |||
| npu_manager_->AddModel(model_buffer_data, GetOMModelName(), context_->GetNpuInfo().frequency_); | |||
| executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName()); | |||
| executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName(), npu_manager_); | |||
| if (executor_ == nullptr) { | |||
| MS_LOG(ERROR) << "Create NPUExecutor failed."; | |||
| @@ -18,10 +18,14 @@ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_ | |||
| #include <vector> | |||
| #include <string> | |||
| #include <memory> | |||
| #include "include/hiai_ir_build.h" | |||
| #include "src/sub_graph_kernel.h" | |||
| #include "src/runtime/agent/npu/npu_executor.h" | |||
| #include "include/graph/op/all_ops.h" | |||
| #ifdef SUPPORT_NPU | |||
| #include "src/runtime/agent/npu/npu_manager.h" | |||
| #endif | |||
| namespace mindspore::kernel { | |||
| using mindspore::lite::RET_ERROR; | |||
| @@ -31,8 +35,8 @@ class SubGraphNpuKernel : public SubGraphKernel { | |||
| SubGraphNpuKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<kernel::LiteKernel *> &inKernels, | |||
| const std::vector<kernel::LiteKernel *> &outKernels, const std::vector<kernel::LiteKernel *> &nodes, | |||
| const lite::InnerContext *ctx = nullptr) | |||
| : SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx) { | |||
| const lite::InnerContext *ctx = nullptr, lite::NPUManager *npu_manager = nullptr) | |||
| : SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx), npu_manager_(npu_manager) { | |||
| subgraph_type_ = kNpuSubGraph; | |||
| } | |||
| @@ -56,7 +60,7 @@ class SubGraphNpuKernel : public SubGraphKernel { | |||
| } | |||
| private: | |||
| domi::ModelBufferData *BuildIRModel(); | |||
| std::shared_ptr<domi::ModelBufferData> BuildIRModel(); | |||
| int BuildNPUInputOp(); | |||
| @@ -71,11 +75,15 @@ class SubGraphNpuKernel : public SubGraphKernel { | |||
| private: | |||
| bool is_compiled_ = false; | |||
| lite::NPUManager *npu_manager_ = nullptr; | |||
| std::vector<ge::Operator> subgraph_input_op_; | |||
| std::vector<ge::Operator> subgraph_output_op_; | |||
| std::vector<lite::Tensor *> out_tensor_sorted_; | |||
| std::vector<ge::Operator *> op_buffer_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_ | |||
| @@ -42,13 +42,13 @@ int FullconnectionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inp | |||
| for (int i = 1; i < input_shape.size(); i++) { | |||
| col *= input_shape[i]; | |||
| } | |||
| auto reshape_op = new (std::nothrow) hiai::op::Const(name_ + "_reshape_data"); | |||
| reshape_op_ = new (std::nothrow) hiai::op::Const(name_ + "_reshape_data"); | |||
| vector<int> reshape_data = {input_shape[0], col}; | |||
| ge::TensorDesc reshape_tensor_desc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_FLOAT); | |||
| ge::TensorPtr reshape_tensor = std::make_shared<hiai::Tensor>(reshape_tensor_desc); | |||
| reshape_tensor->SetData(reinterpret_cast<uint8_t *>(reshape_data.data()), 2 * sizeof(float)); | |||
| reshape_op->set_attr_value(reshape_tensor); | |||
| reshape_->set_input_shape(*reshape_op); | |||
| reshape_op_->set_attr_value(reshape_tensor); | |||
| reshape_->set_input_shape(*reshape_op_); | |||
| fc_ = new (std::nothrow) hiai::op::MatMul(name_); | |||
| if (fc_ == nullptr) { | |||
| @@ -117,6 +117,10 @@ FullconnectionNPUKernel::~FullconnectionNPUKernel() { | |||
| delete biasadd_; | |||
| biasadd_ = nullptr; | |||
| } | |||
| if (reshape_op_ != nullptr) { | |||
| delete reshape_op_; | |||
| reshape_op_ = nullptr; | |||
| } | |||
| } | |||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_FullConnection, NPUKernelCreator<FullconnectionNPUKernel>) | |||
| } // namespace mindspore::kernel | |||
| @@ -40,6 +40,7 @@ class FullconnectionNPUKernel : public ConvolutionBaseNPUKernel { | |||
| hiai::op::Reshape *reshape_ = nullptr; | |||
| hiai::op::MatMul *fc_ = nullptr; | |||
| hiai::op::BiasAdd *biasadd_ = nullptr; | |||
| hiai::op::Const *reshape_op_ = nullptr; | |||
| MatMulParameter *fc_param_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -39,11 +39,6 @@ int InstanceNormNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &input | |||
| } | |||
| op_->set_input_x(*npu_inputs[0]); | |||
| auto gamma = new (std::nothrow) hiai::op::Const(name_ + "_gamma"); | |||
| if (gamma == nullptr) { | |||
| MS_LOG(ERROR) << "New gamma const failed."; | |||
| return RET_ERROR; | |||
| } | |||
| auto gamma_shape = inputs[1]->shape(); | |||
| std::shared_ptr<ge::Tensor> gamma_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor()); | |||
| if (gamma_tensor == nullptr) { | |||
| @@ -54,14 +49,14 @@ int InstanceNormNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &input | |||
| lite::ConverterToNPUDataType(inputs[1]->data_type())); | |||
| gamma_tensor->SetTensorDesc(gamma_tensor_desc); | |||
| gamma_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs[1]->data_c()), inputs[1]->Size()); | |||
| gamma->set_attr_value(gamma_tensor); | |||
| op_->set_input_gamma(*gamma); | |||
| auto beta = new (std::nothrow) hiai::op::Const(name_ + "_beta"); | |||
| if (beta == nullptr) { | |||
| MS_LOG(ERROR) << "New beta const failed."; | |||
| gamma_ = new (std::nothrow) hiai::op::Const(name_ + "_gamma"); | |||
| if (gamma_ == nullptr) { | |||
| MS_LOG(ERROR) << "New gamma_ const failed."; | |||
| return RET_ERROR; | |||
| } | |||
| gamma_->set_attr_value(gamma_tensor); | |||
| op_->set_input_gamma(*gamma_); | |||
| auto beta_shape = inputs[2]->shape(); | |||
| std::shared_ptr<ge::Tensor> beta_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor()); | |||
| if (beta_tensor == nullptr) { | |||
| @@ -72,8 +67,13 @@ int InstanceNormNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &input | |||
| lite::ConverterToNPUDataType(inputs[2]->data_type())); | |||
| beta_tensor->SetTensorDesc(beta_tensor_desc); | |||
| beta_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs[2]->data_c()), inputs[2]->Size()); | |||
| beta->set_attr_value(beta_tensor); | |||
| op_->set_input_beta(*beta); | |||
| beta_ = new (std::nothrow) hiai::op::Const(name_ + "_beta"); | |||
| if (beta_ == nullptr) { | |||
| MS_LOG(ERROR) << "New beta_ const failed."; | |||
| return RET_ERROR; | |||
| } | |||
| beta_->set_attr_value(beta_tensor); | |||
| op_->set_input_beta(*beta_); | |||
| op_->set_attr_epsilon(instance_norm_param_->epsilon_); | |||
| return RET_OK; | |||
| } | |||
| @@ -85,6 +85,14 @@ InstanceNormNPUKernel::~InstanceNormNPUKernel() { | |||
| delete op_; | |||
| op_ = nullptr; | |||
| } | |||
| if (gamma_ != nullptr) { | |||
| delete gamma_; | |||
| gamma_ = nullptr; | |||
| } | |||
| if (beta_ != nullptr) { | |||
| delete beta_; | |||
| beta_ = nullptr; | |||
| } | |||
| } | |||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_InstanceNorm, NPUKernelCreator<InstanceNormNPUKernel>) | |||
| @@ -38,6 +38,8 @@ class InstanceNormNPUKernel : public NPUKernel { | |||
| private: | |||
| hiai::op::InstanceNorm *op_ = nullptr; | |||
| hiai::op::Const *gamma_ = nullptr; | |||
| hiai::op::Const *beta_ = nullptr; | |||
| InstanceNormParameter *instance_norm_param_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -54,10 +54,12 @@ kernel::LiteKernel *NPUKernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| if (!op_parameter->infer_flag_) { | |||
| MS_LOG(ERROR) << "NPU does not support runtime inference shape. Type is:" | |||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_)); | |||
| free(op_parameter); | |||
| return nullptr; | |||
| } | |||
| if (inputs[0]->shape().size() > 4) { | |||
| MS_LOG(ERROR) << "Npu does not support input tensor dims greater than 4"; | |||
| free(op_parameter); | |||
| return nullptr; | |||
| } | |||
| auto *kernel = new (std::nothrow) T(op_parameter, inputs, outputs, ctx); | |||
| @@ -51,19 +51,19 @@ int PadNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const | |||
| ge::TensorDesc padding_tensor_desc(ge::Shape({size, 2}), ge::FORMAT_NCHW, ge::DT_INT32); | |||
| ge::TensorPtr padding_tensor = std::make_shared<hiai::Tensor>(padding_tensor_desc); | |||
| padding_tensor->SetData(reinterpret_cast<uint8_t *>(paddings_.data()), 2 * size * sizeof(int)); | |||
| auto paddings = new hiai::op::Const(name_ + "paddings"); | |||
| paddings->set_attr_value(padding_tensor); | |||
| hiai_paddings_ = new hiai::op::Const(name_ + "paddings"); | |||
| hiai_paddings_->set_attr_value(padding_tensor); | |||
| ge::TensorDesc constant_values_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT); | |||
| ge::TensorPtr constant_values_tensor = std::make_shared<hiai::Tensor>(constant_values_tensor_desc); | |||
| vector<float> constant_values_data_value = {param_->constant_value_}; | |||
| constant_values_tensor->SetData(reinterpret_cast<uint8_t *>(constant_values_data_value.data()), 1 * sizeof(float)); | |||
| auto constant = new hiai::op::Const(name_ + "constant"); | |||
| constant->set_attr_value(constant_values_tensor); | |||
| hiai_constant_ = new hiai::op::Const(name_ + "constant"); | |||
| hiai_constant_->set_attr_value(constant_values_tensor); | |||
| op_->set_input_x(*npu_inputs[0]); | |||
| op_->set_input_constant_values(*constant); | |||
| op_->set_input_paddings(*paddings); | |||
| op_->set_input_constant_values(*hiai_constant_); | |||
| op_->set_input_paddings(*hiai_paddings_); | |||
| return RET_OK; | |||
| } | |||
| @@ -75,6 +75,14 @@ PadNPUKernel::~PadNPUKernel() { | |||
| delete op_; | |||
| op_ = nullptr; | |||
| } | |||
| if (hiai_paddings_ != nullptr) { | |||
| delete hiai_paddings_; | |||
| hiai_paddings_ = nullptr; | |||
| } | |||
| if (hiai_constant_ != nullptr) { | |||
| delete hiai_constant_; | |||
| hiai_constant_ = nullptr; | |||
| } | |||
| } | |||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_PadFusion, NPUKernelCreator<PadNPUKernel>) | |||
| @@ -40,6 +40,8 @@ class PadNPUKernel : public NPUKernel { | |||
| hiai::op::PadV2 *op_ = nullptr; | |||
| PadParameter *param_; | |||
| std::vector<int> paddings_; | |||
| hiai::op::Const *hiai_paddings_ = nullptr; | |||
| hiai::op::Const *hiai_constant_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_PAD_NPU_H_ | |||
| @@ -46,8 +46,8 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||
| vector<int32_t> dataValue = {static_cast<int32_t>(resize_parameter_->new_height_), | |||
| static_cast<int32_t>(resize_parameter_->new_width_)}; | |||
| sizeTensor->SetData(reinterpret_cast<uint8_t *>(dataValue.data()), 2 * sizeof(int32_t)); | |||
| auto out_size = new (std::nothrow) hiai::op::Const(name_ + "_size"); | |||
| out_size->set_attr_value(sizeTensor); | |||
| out_size_ = new (std::nothrow) hiai::op::Const(name_ + "_size"); | |||
| out_size_->set_attr_value(sizeTensor); | |||
| if (resize_parameter_->method_ == schema::ResizeMethod_LINEAR) { | |||
| auto op = new (std::nothrow) hiai::op::ResizeBilinearV2(name_); | |||
| if (op == nullptr) { | |||
| @@ -57,7 +57,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||
| op->set_attr_align_corners(resize_parameter_->coordinate_transform_mode_ == | |||
| schema::CoordinateTransformMode_ALIGN_CORNERS); | |||
| op->set_input_x(*npu_inputs[0]); | |||
| op->set_input_size(*out_size); | |||
| op->set_input_size(*out_size_); | |||
| op->set_attr_half_pixel_centers(resize_parameter_->preserve_aspect_ratio_); | |||
| op_ = op; | |||
| } else if (resize_parameter_->method_ == schema::ResizeMethod_NEAREST) { | |||
| @@ -69,7 +69,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||
| op->set_attr_align_corners(resize_parameter_->coordinate_transform_mode_ == | |||
| schema::CoordinateTransformMode_ALIGN_CORNERS); | |||
| op->set_input_x(*npu_inputs[0]); | |||
| op->set_input_size(*out_size); | |||
| op->set_input_size(*out_size_); | |||
| op_ = op; | |||
| } else { | |||
| MS_LOG(WARNING) << "Unsupported resize method type:" << resize_parameter_->method_; | |||
| @@ -85,6 +85,10 @@ ResizeNPUKernel::~ResizeNPUKernel() { | |||
| delete op_; | |||
| op_ = nullptr; | |||
| } | |||
| if (out_size_ != nullptr) { | |||
| delete out_size_; | |||
| out_size_ = nullptr; | |||
| } | |||
| } | |||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Resize, NPUKernelCreator<ResizeNPUKernel>) | |||
| } // namespace mindspore::kernel | |||
| @@ -40,6 +40,7 @@ class ResizeNPUKernel : public NPUKernel { | |||
| private: | |||
| ge::Operator *op_ = nullptr; | |||
| hiai::op::Const *out_size_ = nullptr; | |||
| ResizeParameter *resize_parameter_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -39,20 +39,20 @@ int SplitNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, cons | |||
| ge::TensorDesc size_splits_tensor_desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32); | |||
| ge::TensorPtr size_splits_tensor = std::make_shared<hiai::Tensor>(size_splits_tensor_desc); | |||
| size_splits_tensor->SetData(reinterpret_cast<uint8_t *>(param_->split_sizes_), size * sizeof(int)); | |||
| auto size_splits = new hiai::op::Const(name_ + "_size"); | |||
| size_splits->set_attr_value(size_splits_tensor); | |||
| size_splits_ = new hiai::op::Const(name_ + "_size"); | |||
| size_splits_->set_attr_value(size_splits_tensor); | |||
| ge::TensorDesc split_dim_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_INT32); | |||
| ge::TensorPtr split_dim_tensor = std::make_shared<hiai::Tensor>(split_dim_tensor_desc); | |||
| vector<int32_t> split_dim_data_value = {param_->split_dim_}; | |||
| split_dim_tensor->SetData(reinterpret_cast<uint8_t *>(split_dim_data_value.data()), 1 * sizeof(int)); | |||
| auto split_dim = new hiai::op::Const(name_ + "_dim"); | |||
| split_dim->set_attr_value(split_dim_tensor); | |||
| split_dim_ = new hiai::op::Const(name_ + "_dim"); | |||
| split_dim_->set_attr_value(split_dim_tensor); | |||
| op_->set_input_x(*npu_inputs[0]); | |||
| op_->set_attr_num_split(param_->num_split_); | |||
| op_->set_input_split_dim(*split_dim); | |||
| op_->set_input_size_splits(*size_splits); | |||
| op_->set_input_split_dim(*split_dim_); | |||
| op_->set_input_size_splits(*size_splits_); | |||
| op_->create_dynamic_output_y(param_->num_split_); | |||
| return RET_OK; | |||
| } | |||
| @@ -64,6 +64,14 @@ SplitNPUKernel::~SplitNPUKernel() { | |||
| delete op_; | |||
| op_ = nullptr; | |||
| } | |||
| if (size_splits_ != nullptr) { | |||
| delete size_splits_; | |||
| size_splits_ = nullptr; | |||
| } | |||
| if (split_dim_ != nullptr) { | |||
| delete split_dim_; | |||
| split_dim_ = nullptr; | |||
| } | |||
| } | |||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Split, NPUKernelCreator<SplitNPUKernel>) | |||
| @@ -39,6 +39,8 @@ class SplitNPUKernel : public NPUKernel { | |||
| private: | |||
| hiai::op::SplitV *op_ = nullptr; | |||
| SplitParameter *param_; | |||
| hiai::op::Const *size_splits_ = nullptr; | |||
| hiai::op::Const *split_dim_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SPLIT_NPU_H_ | |||
| @@ -43,11 +43,11 @@ int UnsqueezeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||
| ge::TensorDesc desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32); | |||
| ge::TensorPtr tensor = std::make_shared<hiai::Tensor>(desc); | |||
| tensor->SetData(reinterpret_cast<uint8_t *>(axis_.data()), size * sizeof(int)); | |||
| auto axis = new hiai::op::Const(name_ + "_axis"); | |||
| axis->set_attr_value(tensor); | |||
| axis_const_ = new hiai::op::Const(name_ + "_axis"); | |||
| axis_const_->set_attr_value(tensor); | |||
| op_->set_input_x(*npu_inputs[0]); | |||
| op_->set_input_axis(*axis); | |||
| op_->set_input_axis(*axis_const_); | |||
| return RET_OK; | |||
| } | |||
| @@ -59,6 +59,10 @@ UnsqueezeNPUKernel::~UnsqueezeNPUKernel() { | |||
| delete op_; | |||
| op_ = nullptr; | |||
| } | |||
| if (axis_const_ != nullptr) { | |||
| delete axis_const_; | |||
| axis_const_ = nullptr; | |||
| } | |||
| } | |||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Unsqueeze, NPUKernelCreator<UnsqueezeNPUKernel>) | |||
| @@ -39,6 +39,7 @@ class UnsqueezeNPUKernel : public NPUKernel { | |||
| private: | |||
| hiai::op::ExpandDims *op_ = nullptr; | |||
| hiai::op::Const *axis_const_ = nullptr; | |||
| vector<int> axis_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -539,8 +539,8 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector<kernel | |||
| } | |||
| if (type == kernel::kNpuSubGraph) { | |||
| #if SUPPORT_NPU | |||
| auto sub_kernel = new (std::nothrow) | |||
| kernel::SubGraphNpuKernel(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_); | |||
| auto sub_kernel = new (std::nothrow) kernel::SubGraphNpuKernel(input_tensors, output_tensors, input_kernels, | |||
| output_kernels, kernels, context_, npu_manager_); | |||
| if (sub_kernel == nullptr) { | |||
| MS_LOG(ERROR) << "NPU subgraph new failed."; | |||
| return nullptr; | |||
| @@ -653,13 +653,14 @@ int Scheduler::RunPass(std::vector<kernel::LiteKernel *> *dst_kernels) { | |||
| return RET_OK; | |||
| } | |||
| auto transform_pass = new NPUTransformPass(context_, dst_kernels, src_tensors_); | |||
| mindspore::lite::NPUPassManager::GetInstance()->AddPass(transform_pass); | |||
| MS_ASSERT(npu_pass_manager_ != nullptr); | |||
| npu_pass_manager_->AddPass(transform_pass); | |||
| auto concat_format_pass = new NPUInsertTransformPass(context_, dst_kernels, src_tensors_); | |||
| mindspore::lite::NPUPassManager::GetInstance()->AddPass(concat_format_pass); | |||
| npu_pass_manager_->AddPass(concat_format_pass); | |||
| auto fusion_pass = new NPUFusionPass(dst_kernels); | |||
| mindspore::lite::NPUPassManager::GetInstance()->AddPass(fusion_pass); | |||
| npu_pass_manager_->AddPass(fusion_pass); | |||
| ret = mindspore::lite::NPUPassManager::GetInstance()->Run(); | |||
| ret = npu_pass_manager_->Run(); | |||
| #endif | |||
| return ret; | |||
| } | |||
| @@ -23,12 +23,24 @@ | |||
| #include "src/sub_graph_kernel.h" | |||
| #include "src/inner_context.h" | |||
| #include "include/model.h" | |||
| #if SUPPORT_NPU | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_manager.h" | |||
| #endif | |||
| namespace mindspore::lite { | |||
| class Scheduler { | |||
| public: | |||
| Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> *src_tensors) | |||
| : context_(ctx), src_model_(src_model), src_tensors_(src_tensors) {} | |||
| #if SUPPORT_NPU | |||
| Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> *src_tensors, | |||
| NPUManager *npu_manager = nullptr, NPUPassManager *npu_pass_manager = nullptr) | |||
| : context_(ctx), | |||
| src_model_(src_model), | |||
| src_tensors_(src_tensors), | |||
| npu_manager_(npu_manager), | |||
| npu_pass_manager_(npu_pass_manager) {} | |||
| #endif | |||
| ~Scheduler() = default; | |||
| int Schedule(std::vector<kernel::LiteKernel *> *dst_kernels); | |||
| @@ -84,6 +96,10 @@ class Scheduler { | |||
| const InnerContext *context_ = nullptr; | |||
| Model *src_model_ = nullptr; | |||
| std::vector<Tensor *> *src_tensors_; | |||
| #if SUPPORT_NPU | |||
| NPUManager *npu_manager_ = nullptr; | |||
| NPUPassManager *npu_pass_manager_ = nullptr; | |||
| #endif | |||
| std::vector<size_t> graph_output_node_indexes_; | |||
| std::map<int, OpParameter *> op_parameters_; | |||
| }; | |||