From: @yeyunpeng2020 Reviewed-by: Signed-off-by:tags/v1.1.0
| @@ -30,6 +30,7 @@ | |||
| #include "src/runtime/kernel/arm/base/dequant.h" | |||
| #if SUPPORT_NPU | |||
| #include "src/runtime/agent/npu/npu_manager.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_manager.h" | |||
| #endif | |||
| namespace mindspore { | |||
| @@ -366,7 +367,7 @@ int LiteSession::CompileGraph(Model *model) { | |||
| return ret; | |||
| } | |||
| // scheduler kernels | |||
| Scheduler scheduler(context_, model, tensors_); | |||
| Scheduler scheduler(context_, model, &tensors_); | |||
| ret = scheduler.Schedule(&kernels_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Schedule kernels failed: " << ret; | |||
| @@ -537,6 +538,10 @@ LiteSession::~LiteSession() { | |||
| delete this->context_; | |||
| delete this->executor_; | |||
| this->executor_ = nullptr; | |||
| #if SUPPORT_NPU | |||
| mindspore::lite::NPUPassManager::GetInstance()->Clear(); | |||
| mindspore::lite::NPUManager::GetInstance()->Reset(); | |||
| #endif | |||
| is_running_.store(false); | |||
| } | |||
| @@ -1,9 +1,8 @@ | |||
| include_directories(${DDK_PATH}) | |||
| include_directories(${CMAKE_CURRENT_SOURCE_DIR}/) | |||
| include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kernel) | |||
| file(GLOB_RECURSE NPU_RUNTIME_SRC | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/*.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../../kernel/npu/*.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/optimizer/*.cc | |||
| ) | |||
| add_library(hiai SHARED IMPORTED) | |||
| set_target_properties(hiai PROPERTIES IMPORTED_LOCATION | |||
| @@ -17,6 +17,7 @@ | |||
| #include "src/runtime/agent/npu/npu_executor.h" | |||
| #include "include/errorcode.h" | |||
| #include "src/runtime/agent/npu/npu_manager.h" | |||
| #include "nnacl/pack.h" | |||
| namespace mindspore::lite { | |||
| int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { | |||
| this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(model_name_); | |||
| @@ -32,6 +33,7 @@ int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { | |||
| } | |||
| int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| const std::vector<kernel::LiteKernel *> &out_kernels, | |||
| const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator, | |||
| const KernelCallBack &before, const KernelCallBack &after) { | |||
| hiai::AiContext context; | |||
| @@ -63,14 +65,32 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector< | |||
| return RET_ERROR; | |||
| } | |||
| // For the output kernel of the entire model, and the format is nchw, the output tensor needs to be nchw TO nhwc. | |||
| std::vector<Tensor *> trans_tensors; | |||
| for (auto kernel : out_kernels) { | |||
| if (kernel->out_kernels().empty() && npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) { | |||
| for (int i = 0; i < kernel->out_tensors().size(); ++i) { | |||
| trans_tensors.push_back(kernel->out_tensors()[i]); | |||
| } | |||
| } | |||
| } | |||
| for (int i = 0; i < npu_output_tensors_.size(); ++i) { | |||
| void *data = out_tensors[i]->MutableData(); | |||
| if (data == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); | |||
| out_tensors[i]->ResetRefCount(); | |||
| if (std::find(trans_tensors.begin(), trans_tensors.end(), out_tensors[i]) != trans_tensors.end()) { | |||
| // Change data&tensor shape nc->nh | |||
| PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data, out_tensors[i]->Batch(), | |||
| out_tensors[i]->Width() * out_tensors[i]->Height(), out_tensors[i]->Channel()); | |||
| out_tensors[i]->set_shape({out_tensors[i]->shape()[0], out_tensors[i]->shape()[2], out_tensors[i]->shape()[3], | |||
| out_tensors[i]->shape()[1]}); | |||
| } else { | |||
| memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); | |||
| out_tensors[i]->ResetRefCount(); | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -32,8 +32,9 @@ class NPUExecutor : public Executor { | |||
| int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override; | |||
| int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator = nullptr, | |||
| const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr); | |||
| const std::vector<kernel::LiteKernel *> &out_kernels, const std::vector<kernel::LiteKernel *> &kernels, | |||
| Allocator *allocator = nullptr, const KernelCallBack &before = nullptr, | |||
| const KernelCallBack &after = nullptr); | |||
| private: | |||
| int GetIOTensorVec(); | |||
| @@ -55,6 +55,26 @@ bool NPUManager::CheckEMUIVersion() { | |||
| return true; | |||
| } | |||
| void NPUManager::Reset() { | |||
| index_ = 0; | |||
| domi::HiaiIrBuild ir_build; | |||
| for (const auto &model_map : models_) { | |||
| auto model = model_map.second; | |||
| if (!model->is_freed) { | |||
| ir_build.ReleaseModelBuff(*model->model_buffer_data_); | |||
| model->model_buffer_data_ = nullptr; | |||
| model->is_freed = true; | |||
| model->desc_.reset(); | |||
| model->desc_ = nullptr; | |||
| } | |||
| } | |||
| models_.clear(); | |||
| for (auto client : clients_) { | |||
| client.reset(); | |||
| } | |||
| clients_.clear(); | |||
| } | |||
| bool NPUManager::CheckDDKVersion() { | |||
| auto client = std::make_shared<hiai::AiModelMngerClient>(); | |||
| if (client->GetVersion() != nullptr) { | |||
| @@ -104,54 +124,102 @@ bool NPUManager::IsKirinChip() { | |||
| return false; | |||
| } | |||
| int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency) { | |||
| hiai::MemBuffer *buffer = mc_builder_->InputMemBufferCreate(model_buf, size); | |||
| if (buffer == nullptr) { | |||
| MS_LOG(ERROR) << "MemBuffer is null."; | |||
| return RET_ERROR; | |||
| } | |||
| int NPUManager::AddModel(domi::ModelBufferData *model_buffer_data, const std::string &model_name, int frequency) { | |||
| auto model = new SubGraphModel(index_, model_name, model_buffer_data, frequency); | |||
| auto desc = std::make_shared<hiai::AiModelDescription>(model_name, frequency, 0, 0, 0); | |||
| desc->SetModelBuffer(buffer->GetMemBufferData(), buffer->GetMemBufferSize()); | |||
| model_desc_.push_back(desc); | |||
| mc_builder_->MemBufferDestroy(buffer); | |||
| model_map_.insert({model_name, index_}); | |||
| model->desc_ = desc; | |||
| models_.insert({model_name, model}); | |||
| index_++; | |||
| return RET_OK; | |||
| } | |||
| std::shared_ptr<hiai::AiModelMngerClient> NPUManager::CreateAiModelMngerClient() { | |||
| auto client = std::make_shared<hiai::AiModelMngerClient>(); | |||
| if (client == nullptr) { | |||
| MS_LOG(ERROR) << "NPU client is nullptr."; | |||
| return nullptr; | |||
| } | |||
| int ret = client->Init(nullptr); | |||
| if (ret != hiai::AI_SUCCESS) { | |||
| MS_LOG(ERROR) << "NPU client init failed. code is " << ret; | |||
| return nullptr; | |||
| } | |||
| return client; | |||
| } | |||
| int NPUManager::LoadOMModel() { | |||
| for (int i = 0; i < index_ / MAX_MODEL_NUM + 1; i++) { | |||
| auto client = std::make_shared<hiai::AiModelMngerClient>(); | |||
| if (client == nullptr) { | |||
| MS_LOG(ERROR) << "NPU client is nullptr."; | |||
| return RET_ERROR; | |||
| std::vector<std::shared_ptr<hiai::AiModelDescription>> models_desc; | |||
| std::shared_ptr<hiai::AiModelMngerClient> client = nullptr; | |||
| std::shared_ptr<hiai::AiModelBuilder> mc_builder = nullptr; | |||
| int total = 0; | |||
| for (const auto &model_map : models_) { | |||
| if (total % MAX_MODEL_NUM == 0) { | |||
| client = CreateAiModelMngerClient(); | |||
| if (client == nullptr) { | |||
| MS_LOG(ERROR) << "Create Client failed."; | |||
| return RET_ERROR; | |||
| } | |||
| mc_builder = std::make_shared<hiai::AiModelBuilder>(client); | |||
| if (mc_builder == nullptr) { | |||
| MS_LOG(ERROR) << "Create AiModelBuilder failed."; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| int ret = client->Init(nullptr); | |||
| if (ret != hiai::AI_SUCCESS) { | |||
| MS_LOG(ERROR) << "NPU client init failed. code is " << ret; | |||
| total++; | |||
| auto model = model_map.second; | |||
| if (model->is_loaded && model->is_freed) { | |||
| continue; | |||
| } | |||
| models_desc.push_back(model->desc_); | |||
| auto buffer = mc_builder->InputMemBufferCreate(model->model_buffer_data_->data, model->model_buffer_data_->length); | |||
| if (buffer == nullptr) { | |||
| MS_LOG(ERROR) << "NPU input memory buffer create failed."; | |||
| return RET_ERROR; | |||
| } | |||
| mc_builder_ = std::make_shared<hiai::AiModelBuilder>(client); | |||
| model->desc_->SetModelBuffer(model->model_buffer_data_->data, model->model_buffer_data_->length); | |||
| if (models_desc.size() == MAX_MODEL_NUM) { | |||
| auto ret = LoadModel(client, models_desc); | |||
| if (ret != RET_ERROR) { | |||
| MS_LOG(ERROR) << "Client load model failed."; | |||
| return RET_ERROR; | |||
| } | |||
| models_desc.clear(); | |||
| } | |||
| } | |||
| vector<std::shared_ptr<hiai::AiModelDescription>> desc(model_desc_.begin() + i * MAX_MODEL_NUM, | |||
| ((i + 1) * MAX_MODEL_NUM > index_) | |||
| ? model_desc_.begin() + index_ | |||
| : model_desc_.begin() + (i + 1) * MAX_MODEL_NUM); | |||
| ret = client->Load(desc); | |||
| if (ret != hiai::AI_SUCCESS) { | |||
| MS_LOG(ERROR) << "Client load model failed." << ret; | |||
| if (!models_desc.empty()) { | |||
| auto ret = LoadModel(client, models_desc); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Client load model failed."; | |||
| return RET_ERROR; | |||
| } | |||
| clients_.push_back(client); | |||
| models_desc.clear(); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient(const std::string &model_name) { | |||
| return clients_[model_map_[model_name] / MAX_MODEL_NUM]; | |||
| return models_[model_name]->client_; | |||
| } | |||
| int NPUManager::index() const { return index_; } | |||
| int NPUManager::LoadModel(const std::shared_ptr<hiai::AiModelMngerClient> &client, | |||
| std::vector<std::shared_ptr<hiai::AiModelDescription>> desc_list) { | |||
| auto ret = client->Load(desc_list); | |||
| if (ret != hiai::AI_SUCCESS) { | |||
| MS_LOG(ERROR) << "Client load model failed." << ret; | |||
| return RET_ERROR; | |||
| } | |||
| for (const auto &desc : desc_list) { | |||
| MS_LOG(ERROR) << desc->GetName(); | |||
| auto it = models_.find(desc->GetName()); | |||
| it->second->is_loaded = true; | |||
| it->second->client_ = client; | |||
| } | |||
| this->clients_.push_back(client); | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::lite | |||
| @@ -18,9 +18,11 @@ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_ | |||
| #include <string> | |||
| #include <memory> | |||
| #include <utility> | |||
| #include <vector> | |||
| #include <unordered_map> | |||
| #include <set> | |||
| #include "include/hiai_ir_build.h" | |||
| #include "schema/model_generated.h" | |||
| #include "include/HiAiModelManagerService.h" | |||
| @@ -29,17 +31,34 @@ static std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = { | |||
| schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, | |||
| schema::PrimitiveType_DepthwiseConv2D, schema::PrimitiveType_DeDepthwiseConv2D, | |||
| schema::PrimitiveType_Resize, schema::PrimitiveType_Pooling}; | |||
| struct SubGraphModel { | |||
| public: | |||
| SubGraphModel(int index, std::string model_name, domi::ModelBufferData *model_buffer_data, int frequency) | |||
| : index_(index), model_name_(std::move(model_name)), model_buffer_data_(model_buffer_data) { | |||
| std::cout << model_name; | |||
| } | |||
| bool is_freed = false; | |||
| bool is_loaded = false; | |||
| int index_; | |||
| std::string model_name_; | |||
| domi::ModelBufferData *model_buffer_data_; | |||
| std::shared_ptr<hiai::AiModelMngerClient> client_; | |||
| std::shared_ptr<hiai::AiModelDescription> desc_; | |||
| }; | |||
| class NPUManager { | |||
| public: | |||
| static NPUManager *GetInstance() { | |||
| static NPUManager npuManager; | |||
| return &npuManager; | |||
| static NPUManager manager; | |||
| return &manager; | |||
| } | |||
| ~NPUManager() { Reset(); } | |||
| bool IsSupportNPU(); | |||
| // provide to subgraph to add model. | |||
| int AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency); | |||
| int AddModel(domi::ModelBufferData *model_buffer_data, const std::string &model_name, int frequency); | |||
| // scheduler to load om model. | |||
| int LoadOMModel(); | |||
| @@ -49,6 +68,11 @@ class NPUManager { | |||
| int index() const; | |||
| void Reset(); | |||
| int LoadModel(const std::shared_ptr<hiai::AiModelMngerClient> &client, | |||
| std::vector<std::shared_ptr<hiai::AiModelDescription>> desc_list); | |||
| private: | |||
| bool IsKirinChip(); | |||
| @@ -58,16 +82,12 @@ class NPUManager { | |||
| int CompareVersion(const std::string &version1, const std::string &version2); | |||
| std::shared_ptr<hiai::AiModelMngerClient> CreateAiModelMngerClient(); | |||
| private: | |||
| int index_ = 0; | |||
| std::unordered_map<std::string, SubGraphModel *> models_; | |||
| std::vector<std::shared_ptr<hiai::AiModelMngerClient>> clients_; | |||
| std::vector<std::shared_ptr<hiai::AiModelDescription>> model_desc_; | |||
| std::shared_ptr<hiai::AiModelBuilder> mc_builder_ = nullptr; | |||
| std::unordered_map<std::string, int> model_map_; | |||
| }; | |||
| } // namespace mindspore::lite | |||
| @@ -13,8 +13,8 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/agent/npu/npu_add_transform_pass.h" | |||
| #include "src/runtime/agent/npu/npu_pass_utils.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_add_transform_pass.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | |||
| namespace mindspore::lite { | |||
| using kernel::KERNEL_ARCH::kNPU; | |||
| int NPUAddTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| @@ -56,9 +56,11 @@ int NPUAddTransformPass::InsertNode(const InnerContext *context, std::vector<ker | |||
| auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++); | |||
| auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); | |||
| all_kernels->push_back(nh2nc_kernel); | |||
| insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); | |||
| auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++); | |||
| auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); | |||
| all_kernels->push_back(nc2nh_kernel); | |||
| insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); | |||
| NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors); | |||
| NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors); | |||
| UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel); | |||
| @@ -91,12 +93,11 @@ int NPUAddTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *ker | |||
| return RET_OK; | |||
| } | |||
| int NPUAddTransformPass::Run(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| if (context->IsNpuEnabled()) { | |||
| int NPUAddTransformPass::Run() { | |||
| if (context_->IsNpuEnabled()) { | |||
| std::vector<kernel::LiteKernel *> new_kernels; | |||
| for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) { | |||
| for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) { | |||
| auto kernel = *it; | |||
| new_kernels.push_back(kernel); | |||
| if (kernel->desc().arch != kNPU) { | |||
| @@ -110,14 +111,14 @@ int NPUAddTransformPass::Run(const InnerContext *context, std::vector<kernel::Li | |||
| } | |||
| } | |||
| if (kernel->out_kernels().size() != sum) { | |||
| InsertNode(context, it, &new_kernels, all_tensors); | |||
| InsertNode(context_, it, &new_kernels, all_tensors_); | |||
| } | |||
| } | |||
| } | |||
| all_kernels->clear(); | |||
| all_kernels_->clear(); | |||
| for (int i = 0; i < new_kernels.size(); i++) { | |||
| all_kernels->push_back(new_kernels[i]); | |||
| all_kernels_->push_back(new_kernels[i]); | |||
| } | |||
| } | |||
| return RET_OK; | |||
| @@ -14,16 +14,29 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "src/ops/primitive_c.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_base_pass.h" | |||
| namespace mindspore::lite { | |||
| class NPUAddTransformPass { | |||
| class NPUAddTransformPass : public NPUBasePass { | |||
| public: | |||
| int Run(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors); | |||
| explicit NPUAddTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| context_ = context; | |||
| all_kernels_ = all_kernels; | |||
| all_tensors_ = all_tensors; | |||
| name_ = "NPUConcatTransformPass"; | |||
| } | |||
| ~NPUAddTransformPass() override { | |||
| for (auto primitive : insert_primitive_) { | |||
| delete primitive; | |||
| } | |||
| insert_primitive_.clear(); | |||
| } | |||
| int Run() override; | |||
| private: | |||
| int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| @@ -37,6 +50,10 @@ class NPUAddTransformPass { | |||
| private: | |||
| int total = 0; | |||
| const InnerContext *context_; | |||
| std::vector<kernel::LiteKernel *> *all_kernels_; | |||
| std::vector<const PrimitiveC *> insert_primitive_; | |||
| std::vector<Tensor *> *all_tensors_; | |||
| }; | |||
| } // namespace mindspore::lite | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_ | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_ | |||
| @@ -0,0 +1,34 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_BASE_PASS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_BASE_PASS_H_ | |||
| #include <string> | |||
| namespace mindspore::lite { | |||
| class NPUBasePass { | |||
| public: | |||
| virtual int Run() = 0; | |||
| virtual ~NPUBasePass() = default; | |||
| std::string name() { return name_; } | |||
| protected: | |||
| std::string name_; | |||
| }; | |||
| } // namespace mindspore::lite | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_BASE_PASS_H_ | |||
| @@ -13,8 +13,8 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/agent/npu/npu_concat_transform_pass.h" | |||
| #include "src/runtime/agent/npu/npu_pass_utils.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | |||
| namespace mindspore::lite { | |||
| using kernel::KERNEL_ARCH::kNPU; | |||
| int NPUConcatTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| @@ -54,9 +54,11 @@ int NPUConcatTransformPass::InsertNode(const InnerContext *context, std::vector< | |||
| auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++); | |||
| auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); | |||
| all_kernels->push_back(nh2nc_kernel); | |||
| insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); | |||
| auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++); | |||
| auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); | |||
| all_kernels->push_back(nc2nh_kernel); | |||
| insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); | |||
| NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors); | |||
| NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors); | |||
| UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel); | |||
| @@ -90,12 +92,11 @@ int NPUConcatTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel * | |||
| return RET_OK; | |||
| } | |||
| int NPUConcatTransformPass::Run(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| if (context->IsNpuEnabled()) { | |||
| int NPUConcatTransformPass::Run() { | |||
| if (context_->IsNpuEnabled()) { | |||
| std::vector<kernel::LiteKernel *> new_kernels; | |||
| for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) { | |||
| for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) { | |||
| auto kernel = *it; | |||
| if (kernel->desc().arch != kNPU) { | |||
| new_kernels.push_back(kernel); | |||
| @@ -109,15 +110,15 @@ int NPUConcatTransformPass::Run(const InnerContext *context, std::vector<kernel: | |||
| } | |||
| } | |||
| if (kernel->out_kernels().size() != sum) { | |||
| InsertNode(context, it, &new_kernels, all_tensors); | |||
| InsertNode(context_, it, &new_kernels, all_tensors_); | |||
| } | |||
| } | |||
| new_kernels.push_back(kernel); | |||
| } | |||
| all_kernels->clear(); | |||
| all_kernels_->clear(); | |||
| for (int i = 0; i < new_kernels.size(); i++) { | |||
| all_kernels->push_back(new_kernels[i]); | |||
| all_kernels_->push_back(new_kernels[i]); | |||
| } | |||
| } | |||
| return RET_OK; | |||
| @@ -14,16 +14,29 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "src/ops/primitive_c.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_base_pass.h" | |||
| namespace mindspore::lite { | |||
| class NPUConcatTransformPass { | |||
| class NPUConcatTransformPass : public NPUBasePass { | |||
| public: | |||
| int Run(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors); | |||
| explicit NPUConcatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| context_ = context; | |||
| all_kernels_ = all_kernels; | |||
| all_tensors_ = all_tensors; | |||
| name_ = "NPUConcatTransformPass"; | |||
| } | |||
| ~NPUConcatTransformPass() override { | |||
| for (auto primitive : insert_primitive_) { | |||
| delete primitive; | |||
| } | |||
| insert_primitive_.clear(); | |||
| } | |||
| int Run() override; | |||
| private: | |||
| int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| @@ -37,6 +50,10 @@ class NPUConcatTransformPass { | |||
| private: | |||
| int total = 0; | |||
| const InnerContext *context_; | |||
| std::vector<kernel::LiteKernel *> *all_kernels_; | |||
| std::vector<Tensor *> *all_tensors_; | |||
| std::vector<const PrimitiveC *> insert_primitive_; | |||
| }; | |||
| } // namespace mindspore::lite | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_ | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_ | |||
| @@ -13,7 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/agent/npu/npu_fusion_pass.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_fusion_pass.h" | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "nnacl/concat_parameter.h" | |||
| @@ -198,7 +198,7 @@ int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) { | |||
| return RET_OK; | |||
| } | |||
| int NPUFusionPass::Fusion() { | |||
| int NPUFusionPass::Run() { | |||
| for (auto kernel : *kernels) { | |||
| switch (kernel->Type()) { | |||
| case schema::PrimitiveType_Concat: | |||
| @@ -14,17 +14,23 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_FUSION_PASS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_FUSION_PASS_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "src/ops/primitive_c.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_base_pass.h" | |||
| namespace mindspore::lite { | |||
| class NPUFusionPass { | |||
| class NPUFusionPass : public NPUBasePass { | |||
| public: | |||
| explicit NPUFusionPass(std::vector<kernel::LiteKernel *> *dst_kernels) { kernels = dst_kernels; } | |||
| ~NPUFusionPass() = default; | |||
| int Fusion(); | |||
| explicit NPUFusionPass(std::vector<kernel::LiteKernel *> *dst_kernels) { | |||
| kernels = dst_kernels; | |||
| name_ = "NPUFusionPass"; | |||
| } | |||
| ~NPUFusionPass() override = default; | |||
| int Run() override; | |||
| protected: | |||
| int ConcatFusion(kernel::LiteKernel *kernel); | |||
| @@ -37,4 +43,4 @@ class NPUFusionPass { | |||
| std::vector<kernel::LiteKernel *> *kernels; | |||
| }; | |||
| } // namespace mindspore::lite | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_ | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_FUSION_PASS_H_ | |||
| @@ -0,0 +1,39 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_manager.h" | |||
| #include "include/errorcode.h" | |||
| #include "src/common/log_adapter.h" | |||
| namespace mindspore::lite { | |||
| void NPUPassManager::AddPass(NPUBasePass *pass) { all_pass_.push_back(pass); } | |||
| int NPUPassManager::Run() { | |||
| for (auto pass : all_pass_) { | |||
| auto ret = pass->Run(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "NPU Pass Run failed. Pass name is:" << pass->name(); | |||
| return ret; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| void NPUPassManager::Clear() { | |||
| for (auto pass : all_pass_) { | |||
| delete pass; | |||
| } | |||
| all_pass_.clear(); | |||
| } | |||
| } // namespace mindspore::lite | |||
| @@ -0,0 +1,41 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_MANAGER_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_MANAGER_H_ | |||
| #include <vector> | |||
| #include "src/runtime/agent/npu/optimizer/npu_base_pass.h" | |||
| namespace mindspore::lite { | |||
| class NPUPassManager { | |||
| public: | |||
| static NPUPassManager *GetInstance() { | |||
| static NPUPassManager pass_manager; | |||
| return &pass_manager; | |||
| } | |||
| ~NPUPassManager() { Clear(); } | |||
| void AddPass(NPUBasePass *pass); | |||
| int Run(); | |||
| void Clear(); | |||
| private: | |||
| std::vector<NPUBasePass *> all_pass_; | |||
| }; | |||
| } // namespace mindspore::lite | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_MANAGER_H_ | |||
| @@ -17,7 +17,7 @@ | |||
| #include "src/kernel_registry.h" | |||
| #include "src/ops/nhwc2nchw.h" | |||
| #include "src/ops/nchw2nhwc.h" | |||
| #include "src/runtime/agent/npu/npu_pass_utils.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | |||
| namespace mindspore::lite { | |||
| using kernel::KERNEL_ARCH::kCPU; | |||
| using kernel::KERNEL_ARCH::kNPU; | |||
| @@ -34,7 +34,7 @@ PrimitiveC *NPUPassUtils::CreateNchw2NhwcPrimitive() { | |||
| } | |||
| auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize())); | |||
| if (primitive_buf == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc primitive_buf_ failed."; | |||
| MS_LOG(ERROR) << "Malloc primitive buffer failed."; | |||
| fbb.Clear(); | |||
| return nullptr; | |||
| } | |||
| @@ -58,7 +58,7 @@ PrimitiveC *NPUPassUtils::CreateNhwc2NchwPrimitive() { | |||
| } | |||
| auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize())); | |||
| if (primitive_buf == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc primitive_buf_ failed."; | |||
| MS_LOG(ERROR) << "Malloc primitive buffer failed."; | |||
| fbb.Clear(); | |||
| return nullptr; | |||
| } | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_ | |||
| #include <vector> | |||
| #include <string> | |||
| #include "src/ops/primitive_c.h" | |||
| @@ -41,4 +41,4 @@ class NPUPassUtils { | |||
| static PrimitiveC *CreateNhwc2NchwPrimitive(); | |||
| }; | |||
| } // namespace mindspore::lite | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_ | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_ | |||
| @@ -13,11 +13,11 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/agent/npu/npu_transform_pass.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_transform_pass.h" | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "src/runtime/agent/npu/npu_manager.h" | |||
| #include "src/runtime/agent/npu/npu_pass_utils.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | |||
| namespace mindspore::lite { | |||
| using kernel::KERNEL_ARCH::kCPU; | |||
| using kernel::KERNEL_ARCH::kNPU; | |||
| @@ -77,6 +77,7 @@ int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vector<ker | |||
| NPUPassUtils::CreateNhwc2NchwKernel({kernel->in_tensors()[0]}, pre_trans_out_tensors, context, name); | |||
| // Insert Nhwc2Nchw into the front of the current queue | |||
| all_kernels->push_back(pre_trans_kernel); | |||
| insert_primitive_.push_back(pre_trans_kernel->GetPrimitive()); | |||
| // Replace the output kernel of the previous node | |||
| std::vector<kernel::LiteKernel *> pre_trans_in_kernel; | |||
| if (is_input_kernel) { | |||
| @@ -99,6 +100,10 @@ int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector<ke | |||
| std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| auto kernel = *it; | |||
| // Model output does not insert operator | |||
| if (kernel->out_kernels().empty()) { | |||
| return RET_OK; | |||
| } | |||
| // Single output multiple references | |||
| for (int i = 0; i < kernel->out_kernels().size(); i++) { | |||
| auto next_kernel = kernel->out_kernels().at(i); | |||
| @@ -118,6 +123,7 @@ int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector<ke | |||
| // Replace the input tensor of the next node | |||
| NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {next_kernel}, kernel->out_tensors(), | |||
| post_trans_out_tensors); | |||
| insert_primitive_.push_back(post_trans_kernel->GetPrimitive()); | |||
| // Directly insert in the back, will not affect the topological sort | |||
| all_kernels->push_back(post_trans_kernel); | |||
| UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, next_kernel); | |||
| @@ -171,28 +177,27 @@ int NPUTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel | |||
| return RET_OK; | |||
| } | |||
| int NPUTransformPass::FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| if (context->IsNpuEnabled()) { | |||
| int NPUTransformPass::Run() { | |||
| if (context_->IsNpuEnabled()) { | |||
| std::vector<kernel::LiteKernel *> new_kernels; | |||
| for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) { | |||
| for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) { | |||
| auto kernel = *it; | |||
| if (kernel->desc().arch != kNPU) { | |||
| new_kernels.push_back(kernel); | |||
| continue; | |||
| } | |||
| if (npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) { | |||
| InsertPreNode(context, it, &new_kernels, all_tensors); | |||
| InsertPreNode(context_, it, &new_kernels, all_tensors_); | |||
| new_kernels.push_back(kernel); | |||
| InsertPostNode(context, it, &new_kernels, all_tensors); | |||
| InsertPostNode(context_, it, &new_kernels, all_tensors_); | |||
| } else { | |||
| new_kernels.push_back(kernel); | |||
| } | |||
| } | |||
| all_kernels->clear(); | |||
| all_kernels_->clear(); | |||
| for (int i = 0; i < new_kernels.size(); i++) { | |||
| all_kernels->push_back(new_kernels[i]); | |||
| all_kernels_->push_back(new_kernels[i]); | |||
| } | |||
| } | |||
| return RET_OK; | |||
| @@ -14,16 +14,30 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_TRANSFORM_PASS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_TRANSFORM_PASS_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "src/ops/primitive_c.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_base_pass.h" | |||
| namespace mindspore::lite { | |||
| class NPUTransformPass { | |||
| class NPUTransformPass : public NPUBasePass { | |||
| public: | |||
| int FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors); | |||
| int Run() override; | |||
| explicit NPUTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| context_ = context; | |||
| all_kernels_ = all_kernels; | |||
| all_tensors_ = all_tensors; | |||
| name_ = "NPUTransformPass"; | |||
| } | |||
| ~NPUTransformPass() override { | |||
| for (auto primitive : insert_primitive_) { | |||
| delete primitive; | |||
| } | |||
| insert_primitive_.clear(); | |||
| } | |||
| private: | |||
| int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| @@ -46,6 +60,10 @@ class NPUTransformPass { | |||
| private: | |||
| int total = 0; | |||
| const InnerContext *context_; | |||
| std::vector<kernel::LiteKernel *> *all_kernels_; | |||
| std::vector<Tensor *> *all_tensors_; | |||
| std::vector<const PrimitiveC *> insert_primitive_; | |||
| }; | |||
| } // namespace mindspore::lite | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_ | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_TRANSFORM_PASS_H_ | |||
| @@ -70,7 +70,7 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { | |||
| } | |||
| int SubGraphNpuKernel::Run() { | |||
| return reinterpret_cast<lite::NPUExecutor *>(this->executor_)->Run(in_tensors_, out_tensors_, nodes_); | |||
| return reinterpret_cast<lite::NPUExecutor *>(this->executor_)->Run(in_tensors_, out_tensors_, out_kernels_, nodes_); | |||
| } | |||
| int SubGraphNpuKernel::BuildNPUInputOp() { | |||
| @@ -159,17 +159,17 @@ int SubGraphNpuKernel::BuildNPUOutputOp() { | |||
| std::string SubGraphNpuKernel::GetOMModelName() { return this->name_ + ".om"; } | |||
| int SubGraphNpuKernel::Init() { | |||
| if (!isCompiled_) { | |||
| model_buffer_data_ = BuildIRModel(); | |||
| if (model_buffer_data_ == nullptr) { | |||
| if (!is_compiled_) { | |||
| auto model_buffer_data = BuildIRModel(); | |||
| if (model_buffer_data == nullptr) { | |||
| MS_LOG(ERROR) << "Build IR model failed."; | |||
| return RET_ERROR; | |||
| } | |||
| name_ = "kNpuSubGraph" + std::to_string(mindspore::lite::NPUManager::GetInstance()->index()); | |||
| mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data_->data, model_buffer_data_->length, | |||
| GetOMModelName(), context_->GetNpuInfo().frequency_); | |||
| mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data, GetOMModelName(), | |||
| context_->GetNpuInfo().frequency_); | |||
| executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName()); | |||
| @@ -177,7 +177,7 @@ int SubGraphNpuKernel::Init() { | |||
| MS_LOG(ERROR) << "Create NPUExecutor failed."; | |||
| return RET_ERROR; | |||
| } | |||
| isCompiled_ = true; | |||
| is_compiled_ = true; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -69,9 +69,7 @@ class SubGraphNpuKernel : public SubGraphKernel { | |||
| std::string GetOMModelName(); | |||
| private: | |||
| bool isCompiled_ = false; | |||
| domi::ModelBufferData *model_buffer_data_; | |||
| bool is_compiled_ = false; | |||
| std::vector<ge::Operator> subgraph_input_op_; | |||
| @@ -42,21 +42,14 @@ using mindspore::schema::PrimitiveType_Sub; | |||
| namespace mindspore::kernel { | |||
| int ArithmeticNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) { | |||
| if (primitive_->Type() == PrimitiveType_Mul || primitive_->Type() == PrimitiveType_Div) { | |||
| if (primitive_->Type() == PrimitiveType_Mul || primitive_->Type() == PrimitiveType_Div || | |||
| primitive_->Type() == PrimitiveType_Add || primitive_->Type() == PrimitiveType_Sub) { | |||
| if (inputs[0]->shape() != inputs[1]->shape()) { | |||
| MS_LOG(WARNING) << "For the two inputs, the corresponding dimensions must have the same value." | |||
| MS_LOG(WARNING) << name_ << " for the two inputs, the corresponding dimensions must have the same value." | |||
| << " shape 1 is:" << inputs[0]->shape() << " shape 2 is:" << inputs[1]->shape(); | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| if (primitive_->Type() == PrimitiveType_Add || primitive_->Type() == PrimitiveType_Sub) { | |||
| if (inputs[0]->shape().size() != inputs[1]->shape().size()) { | |||
| MS_LOG(WARNING) | |||
| << "For the two inputs, the corresponding dimensions must have the same value, or one of them is 1." | |||
| << " shape 1 is:" << inputs[0]->shape() << " shape 2 is:" << inputs[1]->shape(); | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -35,8 +35,8 @@ int CastNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const | |||
| return RET_ERROR; | |||
| } | |||
| op_->set_input_x(*npu_inputs[0]); | |||
| op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(dst_type_)); | |||
| op_->set_attr_src_dtype(lite::ConverterToNPUDataType(src_type_)); | |||
| op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(cast_parameter_->dst_type_))); | |||
| op_->set_attr_src_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(cast_parameter_->src_type_))); | |||
| return RET_OK; | |||
| } | |||
| @@ -27,9 +27,7 @@ class CastNPUKernel : public NPUKernel { | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| auto cast_parameter = reinterpret_cast<CastParameter *>(parameter); | |||
| dst_type_ = static_cast<TypeId>(cast_parameter->dst_type_); | |||
| src_type_ = static_cast<TypeId>(cast_parameter->src_type_); | |||
| cast_parameter_ = reinterpret_cast<CastParameter *>(parameter); | |||
| } | |||
| ~CastNPUKernel() override; | |||
| @@ -41,8 +39,7 @@ class CastNPUKernel : public NPUKernel { | |||
| private: | |||
| hiai::op::CastT *op_ = nullptr; | |||
| TypeId dst_type_; | |||
| TypeId src_type_; | |||
| CastParameter *cast_parameter_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CAST_NPU_H_ | |||
| @@ -18,8 +18,8 @@ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "src/runtime/kernel/npu/npu_kernel.h" | |||
| #include "include/graph/op/all_ops.h" | |||
| #include "src/runtime/kernel/npu/transpose_base_npu.h" | |||
| #include "nnacl/conv_parameter.h" | |||
| namespace mindspore::kernel { | |||
| @@ -30,7 +30,7 @@ int ConvolutionNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, | |||
| int ConvolutionNPUKernel::SetConvParam() { | |||
| conv_->set_attr_strides(ge::AttrValue::LIST_INT({conv_param_->stride_h_, conv_param_->stride_w_})); | |||
| conv_->set_attr_dilations(ge::AttrValue::LIST_INT({conv_param_->dilation_h_, conv_param_->dilation_w_})); | |||
| conv_->set_attr_groups(1); | |||
| conv_->set_attr_groups(conv_param_->group_); | |||
| if (conv_param_->pad_mode_ == Pad_Same) { | |||
| conv_->set_attr_pad_mode(ge::AttrValue::STR{"SAME"}); | |||
| @@ -41,7 +41,7 @@ int GatherNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||
| op_->set_input_x(*npu_inputs[0]); | |||
| op_->set_input_indices(*npu_inputs[1]); | |||
| op_->set_attr_axis(axis_); | |||
| op_->set_attr_axis(gather_parameter_->axis_); | |||
| return RET_OK; | |||
| } | |||
| @@ -27,8 +27,7 @@ class GatherNPUKernel : public NPUKernel { | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| auto gather_parameter = reinterpret_cast<GatherParameter *>(parameter); | |||
| axis_ = gather_parameter->axis_; | |||
| gather_parameter_ = reinterpret_cast<GatherParameter *>(parameter); | |||
| } | |||
| ~GatherNPUKernel() override; | |||
| @@ -40,7 +39,7 @@ class GatherNPUKernel : public NPUKernel { | |||
| private: | |||
| hiai::op::GatherV2D *op_ = nullptr; | |||
| int axis_; | |||
| GatherParameter *gather_parameter_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_GATHER_NPU_H_ | |||
| @@ -33,8 +33,8 @@ int MatMulNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||
| op_->set_input_x1(*npu_inputs[0]); | |||
| op_->set_input_x2(*npu_inputs[1]); | |||
| op_->set_attr_transpose_x1(a_transpose_); | |||
| op_->set_attr_transpose_x2(b_transpose_); | |||
| op_->set_attr_transpose_x1(matmul_parameter_->a_transpose_); | |||
| op_->set_attr_transpose_x2(matmul_parameter_->b_transpose_); | |||
| return RET_OK; | |||
| } | |||
| @@ -28,9 +28,7 @@ class MatMulNPUKernel : public NPUKernel { | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| auto matmul_parameter = reinterpret_cast<MatMulParameter *>(parameter); | |||
| a_transpose_ = matmul_parameter->a_transpose_; | |||
| b_transpose_ = matmul_parameter->b_transpose_; | |||
| matmul_parameter_ = reinterpret_cast<MatMulParameter *>(parameter); | |||
| } | |||
| ~MatMulNPUKernel() override; | |||
| @@ -42,8 +40,7 @@ class MatMulNPUKernel : public NPUKernel { | |||
| private: | |||
| hiai::op::MatMul *op_ = nullptr; | |||
| bool a_transpose_ = false; | |||
| bool b_transpose_ = false; | |||
| MatMulParameter *matmul_parameter_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_MATMUL_NPU_H_ | |||
| @@ -53,6 +53,11 @@ kernel::LiteKernel *NPUKernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | |||
| const lite::InnerContext *ctx, const kernel::KernelKey &desc, | |||
| const mindspore::lite::PrimitiveC *primitive) { | |||
| if (!primitive->infer_flag()) { | |||
| MS_LOG(ERROR) << "NPU does not support runtime inference shape"; | |||
| return nullptr; | |||
| } | |||
| auto *kernel = new (std::nothrow) T(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "kernel " << opParameter->name_ << "is nullptr."; | |||
| @@ -25,7 +25,7 @@ using mindspore::schema::PrimitiveType_Pad; | |||
| namespace mindspore::kernel { | |||
| int PadNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| OpParameter *opParameter) { | |||
| if (padding_mode_ != schema::PaddingMode_CONSTANT) { | |||
| if (pad_->GetPaddingMode() != schema::PaddingMode_CONSTANT) { | |||
| MS_LOG(WARNING) << "NPU only support CONSTANT padding mode"; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -39,16 +39,16 @@ int PadNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const | |||
| MS_LOG(ERROR) << name_ << " op is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| int size = static_cast<int>(paddings_.size() / 2); | |||
| int size = static_cast<int>(pad_->GetPaddings().size() / 2); | |||
| ge::TensorDesc padding_tensor_desc(ge::Shape({size, 2}), ge::FORMAT_NCHW, ge::DT_INT32); | |||
| ge::TensorPtr padding_tensor = std::make_shared<hiai::Tensor>(padding_tensor_desc); | |||
| padding_tensor->SetData(reinterpret_cast<uint8_t *>(paddings_.data()), size * sizeof(int)); | |||
| padding_tensor->SetData(reinterpret_cast<uint8_t *>(pad_->GetPaddings().data()), size * sizeof(int)); | |||
| auto paddings = new hiai::op::Const(name_ + "paddings"); | |||
| paddings->set_attr_value(padding_tensor); | |||
| ge::TensorDesc constant_values_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT); | |||
| ge::TensorPtr constant_values_tensor = std::make_shared<hiai::Tensor>(constant_values_tensor_desc); | |||
| vector<float> constant_values_data_value = {constant_value_}; | |||
| vector<float> constant_values_data_value = {pad_->GetConstantValue()}; | |||
| constant_values_tensor->SetData(reinterpret_cast<uint8_t *>(constant_values_data_value.data()), 1 * sizeof(float)); | |||
| auto constant = new hiai::op::Const(name_ + "constant"); | |||
| constant->set_attr_value(constant_values_tensor); | |||
| @@ -28,10 +28,7 @@ class PadNPUKernel : public NPUKernel { | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| auto pad = reinterpret_cast<const mindspore::lite::Pad *>(primitive); | |||
| constant_value_ = pad->GetConstantValue(); | |||
| paddings_ = pad->GetPaddings(); | |||
| padding_mode_ = pad->GetPaddingMode(); | |||
| pad_ = reinterpret_cast<const mindspore::lite::Pad *>(primitive); | |||
| } | |||
| ~PadNPUKernel() override; | |||
| @@ -43,9 +40,7 @@ class PadNPUKernel : public NPUKernel { | |||
| private: | |||
| hiai::op::PadV2 *op_ = nullptr; | |||
| std::vector<int> paddings_; | |||
| int padding_mode_; | |||
| float constant_value_; | |||
| const mindspore::lite::Pad *pad_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_PAD_NPU_H_ | |||
| @@ -27,8 +27,9 @@ using mindspore::schema::PrimitiveType_Resize; | |||
| namespace mindspore::kernel { | |||
| int ResizeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| OpParameter *opParameter) { | |||
| if (method_ != schema::ResizeMethod_LINEAR || method_ == schema::ResizeMethod_NEAREST) { | |||
| MS_LOG(WARNING) << "Unsupported resize method type:" << method_; | |||
| if (resize_parameter_->method_ != schema::ResizeMethod_LINEAR || | |||
| resize_parameter_->method_ == schema::ResizeMethod_NEAREST) { | |||
| MS_LOG(WARNING) << "Unsupported resize method type:" << resize_parameter_->method_; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| @@ -38,20 +39,21 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||
| const std::vector<ge::Operator *> &npu_inputs) { | |||
| ge::TensorDesc sizeTensorDesc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_INT32); | |||
| ge::TensorPtr sizeTensor = std::make_shared<hiai::Tensor>(sizeTensorDesc); | |||
| vector<int32_t> dataValue = {static_cast<int32_t>(new_height_), static_cast<int32_t>(new_width_)}; | |||
| vector<int32_t> dataValue = {static_cast<int32_t>(resize_parameter_->new_height_), | |||
| static_cast<int32_t>(resize_parameter_->new_width_)}; | |||
| sizeTensor->SetData(reinterpret_cast<uint8_t *>(dataValue.data()), 2 * sizeof(int32_t)); | |||
| auto out_size = new (std::nothrow) hiai::op::Const(name_ + "_size"); | |||
| out_size->set_attr_value(sizeTensor); | |||
| if (method_ == schema::ResizeMethod_LINEAR) { | |||
| if (resize_parameter_->method_ == schema::ResizeMethod_LINEAR) { | |||
| auto op = new (std::nothrow) hiai::op::ResizeBilinearV2(name_); | |||
| if (op == nullptr) { | |||
| MS_LOG(ERROR) << " op is nullptr."; | |||
| return RET_ERROR; | |||
| } | |||
| op->set_attr_align_corners(align_corners_); | |||
| op->set_attr_align_corners(resize_parameter_->align_corners_); | |||
| op->set_input_x(*npu_inputs[0]); | |||
| op->set_input_size(*out_size); | |||
| op->set_attr_half_pixel_centers(preserve_aspect_ratio_); | |||
| op->set_attr_half_pixel_centers(resize_parameter_->preserve_aspect_ratio_); | |||
| op_ = op; | |||
| } else { | |||
| auto op = new (std::nothrow) hiai::op::ResizeNearestNeighborV2(name_); | |||
| @@ -59,7 +61,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||
| MS_LOG(ERROR) << " op is nullptr."; | |||
| return RET_ERROR; | |||
| } | |||
| op->set_attr_align_corners(align_corners_); | |||
| op->set_attr_align_corners(resize_parameter_->align_corners_); | |||
| op->set_input_x(*npu_inputs[0]); | |||
| op->set_input_size(*out_size); | |||
| op_ = op; | |||
| @@ -22,7 +22,6 @@ | |||
| #include "nnacl/arithmetic_common.h" | |||
| #include "src/runtime/kernel/npu/npu_kernel.h" | |||
| #include "include/graph/op/all_ops.h" | |||
| #include "src/runtime/kernel/npu/transpose_base_npu.h" | |||
| namespace mindspore::kernel { | |||
| class ResizeNPUKernel : public NPUKernel { | |||
| public: | |||
| @@ -30,12 +29,7 @@ class ResizeNPUKernel : public NPUKernel { | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| auto resize_parameter = reinterpret_cast<ResizeParameter *>(parameter); | |||
| method_ = resize_parameter->method_; | |||
| new_height_ = resize_parameter->new_height_; | |||
| new_width_ = resize_parameter->new_width_; | |||
| align_corners_ = resize_parameter->align_corners_; | |||
| preserve_aspect_ratio_ = resize_parameter->preserve_aspect_ratio_; | |||
| resize_parameter_ = reinterpret_cast<ResizeParameter *>(parameter); | |||
| } | |||
| ~ResizeNPUKernel() override; | |||
| @@ -48,11 +42,7 @@ class ResizeNPUKernel : public NPUKernel { | |||
| private: | |||
| ge::Operator *op_ = nullptr; | |||
| int method_; | |||
| int64_t new_height_; | |||
| int64_t new_width_; | |||
| bool align_corners_; | |||
| bool preserve_aspect_ratio_; | |||
| ResizeParameter *resize_parameter_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_RESIZE_NPU_H_ | |||
| @@ -34,7 +34,7 @@ int ScaleNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, cons | |||
| MS_LOG(ERROR) << name_ << " op is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| op_->set_attr_axis(this->axis_); | |||
| op_->set_attr_axis(scale_parameter_->axis_); | |||
| op_->set_input_x(*npu_inputs[0]); | |||
| op_->set_input_scale(*npu_inputs[1]); | |||
| op_->set_input_bias(*npu_inputs[2]); | |||
| @@ -27,7 +27,7 @@ class ScaleNPUKernel : public NPUKernel { | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| axis_ = reinterpret_cast<ScaleParameter *>(parameter)->axis_; | |||
| scale_parameter_ = reinterpret_cast<ScaleParameter *>(parameter); | |||
| } | |||
| ~ScaleNPUKernel() override; | |||
| @@ -39,7 +39,7 @@ class ScaleNPUKernel : public NPUKernel { | |||
| private: | |||
| hiai::op::Scale *op_ = nullptr; | |||
| int axis_; | |||
| ScaleParameter *scale_parameter_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_Scale_NPU_H_ | |||
| @@ -35,10 +35,10 @@ int SoftmaxNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||
| MS_LOG(ERROR) << name_ << " op is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| if (axis_ == -1) { | |||
| if (softmax_parameter_->axis_ == -1) { | |||
| op_->set_attr_axis(inputs[0]->shape().size() - 1); | |||
| } else { | |||
| op_->set_attr_axis(axis_); | |||
| op_->set_attr_axis(softmax_parameter_->axis_); | |||
| } | |||
| op_->set_input_x(*npu_inputs[0]); | |||
| return RET_OK; | |||
| @@ -27,8 +27,7 @@ class SoftmaxNPUKernel : public NPUKernel { | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| auto softmax_parameter = reinterpret_cast<SoftmaxParameter *>(parameter); | |||
| axis_ = softmax_parameter->axis_; | |||
| softmax_parameter_ = reinterpret_cast<SoftmaxParameter *>(parameter); | |||
| } | |||
| ~SoftmaxNPUKernel() override; | |||
| @@ -40,7 +39,7 @@ class SoftmaxNPUKernel : public NPUKernel { | |||
| private: | |||
| hiai::op::Softmax *op_ = nullptr; | |||
| int axis_; | |||
| SoftmaxParameter *softmax_parameter_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SOFTMAX_NPU_H_ | |||
| @@ -35,25 +35,25 @@ int SplitNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, cons | |||
| MS_LOG(ERROR) << name_ << " op is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| int size = size_splits_.size(); | |||
| int size = split_->size_splits().size(); | |||
| ge::TensorDesc size_splits_tensor_desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32); | |||
| ge::TensorPtr size_splits_tensor = std::make_shared<hiai::Tensor>(size_splits_tensor_desc); | |||
| size_splits_tensor->SetData(reinterpret_cast<uint8_t *>(size_splits_.data()), size * sizeof(int)); | |||
| size_splits_tensor->SetData(reinterpret_cast<uint8_t *>(split_->size_splits().data()), size * sizeof(int)); | |||
| auto size_splits = new hiai::op::Const(name_ + "_size"); | |||
| size_splits->set_attr_value(size_splits_tensor); | |||
| ge::TensorDesc split_dim_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_INT32); | |||
| ge::TensorPtr split_dim_tensor = std::make_shared<hiai::Tensor>(split_dim_tensor_desc); | |||
| vector<int32_t> split_dim_data_value = {split_dim_}; | |||
| vector<int32_t> split_dim_data_value = {split_->GetSplitDim()}; | |||
| split_dim_tensor->SetData(reinterpret_cast<uint8_t *>(split_dim_data_value.data()), 1 * sizeof(int)); | |||
| auto split_dim = new hiai::op::Const(name_ + "_dim"); | |||
| split_dim->set_attr_value(split_dim_tensor); | |||
| op_->set_input_x(*npu_inputs[0]); | |||
| op_->set_attr_num_split(num_split_); | |||
| op_->set_attr_num_split(split_->GetNumberSplit()); | |||
| op_->set_input_split_dim(*split_dim); | |||
| op_->set_input_size_splits(*size_splits); | |||
| op_->create_dynamic_output_y(num_split_); | |||
| op_->create_dynamic_output_y(split_->GetNumberSplit()); | |||
| return RET_OK; | |||
| } | |||
| @@ -27,10 +27,7 @@ class SplitNPUKernel : public NPUKernel { | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| auto split = reinterpret_cast<const mindspore::lite::Split *>(primitive); | |||
| num_split_ = split->GetNumberSplit(); | |||
| size_splits_ = split->GetSizeSplit(); | |||
| split_dim_ = split->GetSplitDim(); | |||
| split_ = reinterpret_cast<const mindspore::lite::Split *>(primitive); | |||
| } | |||
| ~SplitNPUKernel() override; | |||
| @@ -42,9 +39,7 @@ class SplitNPUKernel : public NPUKernel { | |||
| private: | |||
| hiai::op::SplitV *op_ = nullptr; | |||
| int num_split_; | |||
| std::vector<int> size_splits_; | |||
| int split_dim_; | |||
| const mindspore::lite::Split *split_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SPLIT_NPU_H_ | |||
| @@ -59,11 +59,11 @@ int StridedSliceNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &input | |||
| } else { | |||
| op_->set_input_strides(*npu_inputs[3]); | |||
| } | |||
| op_->set_attr_begin_mask(begin_mask_); | |||
| op_->set_attr_ellipsis_mask(ellipsis_mask_); | |||
| op_->set_attr_end_mask(end_mask_); | |||
| op_->set_attr_shrink_axis_mask(shrink_axis_mask_); | |||
| op_->set_attr_new_axis_mask(new_axis_mask_); | |||
| op_->set_attr_begin_mask(strided_slice_->GetBeginMask()); | |||
| op_->set_attr_ellipsis_mask(strided_slice_->GetEllipsisMask()); | |||
| op_->set_attr_end_mask(strided_slice_->GetEndMask()); | |||
| op_->set_attr_shrink_axis_mask(strided_slice_->GetShrinkAxisMask()); | |||
| op_->set_attr_new_axis_mask(strided_slice_->GetNewAxisMask()); | |||
| return RET_OK; | |||
| } | |||
| @@ -28,12 +28,7 @@ class StridedSliceNPUKernel : public NPUKernel { | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| auto strided_slice = reinterpret_cast<const mindspore::lite::StridedSlice *>(primitive); | |||
| begin_mask_ = strided_slice->GetBeginMask(); | |||
| end_mask_ = strided_slice->GetEndMask(); | |||
| ellipsis_mask_ = strided_slice->GetEllipsisMask(); | |||
| new_axis_mask_ = strided_slice->GetNewAxisMask(); | |||
| shrink_axis_mask_ = strided_slice->GetShrinkAxisMask(); | |||
| strided_slice_ = reinterpret_cast<const mindspore::lite::StridedSlice *>(primitive); | |||
| } | |||
| ~StridedSliceNPUKernel() override; | |||
| @@ -45,11 +40,7 @@ class StridedSliceNPUKernel : public NPUKernel { | |||
| private: | |||
| hiai::op::StridedSlice *op_ = nullptr; | |||
| int begin_mask_; | |||
| int end_mask_; | |||
| int ellipsis_mask_; | |||
| int new_axis_mask_; | |||
| int shrink_axis_mask_; | |||
| const mindspore::lite::StridedSlice *strided_slice_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_STRIDEDSLICE_NPU_H_ | |||
| @@ -1,54 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/npu/transpose_base_npu.h" | |||
| namespace mindspore::kernel { | |||
| TransposeBaseNPUKernel::~TransposeBaseNPUKernel() { | |||
| if (pre_trans_ != nullptr) { | |||
| delete pre_trans_; | |||
| pre_trans_ = nullptr; | |||
| } | |||
| if (post_trans_ != nullptr) { | |||
| delete post_trans_; | |||
| post_trans_ = nullptr; | |||
| } | |||
| } | |||
| int TransposeBaseNPUKernel::SetPreTranspose(const ge::Operator *input) { | |||
| // input permute: NHWC -> NCHW | |||
| pre_trans_ = new (std::nothrow) hiai::op::Permute(name_ + "_pre_transpose"); | |||
| if (pre_trans_ == nullptr) { | |||
| MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| pre_trans_->set_input_x(*input); | |||
| pre_trans_->set_attr_order(ge::AttrValue::LIST_INT({0, 3, 1, 2})); | |||
| return RET_OK; | |||
| } | |||
| int TransposeBaseNPUKernel::SetPostTranspose(const ge::Operator *input) { | |||
| // permute: NCHW -> NHWC | |||
| post_trans_ = new (std::nothrow) hiai::op::Permute(name_ + "_post_transpose"); | |||
| if (post_trans_ == nullptr) { | |||
| MS_LOG(ERROR) << "New post transpose operator (NCHW -> NHWC) for op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| post_trans_->set_input_x(*input); | |||
| post_trans_->set_attr_order(ge::AttrValue::LIST_INT({0, 2, 3, 1})); | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::kernel | |||
| @@ -1,41 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TRANSPOSE_BASE_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TRANSPOSE_BASE_H_ | |||
| #include <vector> | |||
| #include "include/graph/op/all_ops.h" | |||
| #include "include/graph/compatible/all_ops.h" | |||
| #include "src/runtime/kernel/npu/npu_kernel.h" | |||
| #include "nnacl/op_base.h" | |||
| namespace mindspore::kernel { | |||
| class TransposeBaseNPUKernel : public NPUKernel { | |||
| public: | |||
| TransposeBaseNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~TransposeBaseNPUKernel() override; | |||
| protected: | |||
| int SetPreTranspose(const ge::Operator *input); | |||
| int SetPostTranspose(const ge::Operator *input); | |||
| hiai::op::Permute *pre_trans_ = nullptr; | |||
| hiai::op::Permute *post_trans_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TRANSPOSE_BASE_H_ | |||
| @@ -30,7 +30,7 @@ int TransposeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, con | |||
| MS_LOG(ERROR) << "Unsupported conjugate transpose."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| return RET_ERROR; | |||
| } | |||
| int TransposeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||
| @@ -57,6 +57,4 @@ TransposeNPUKernel::~TransposeNPUKernel() { | |||
| } | |||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Transpose, NPUKernelCreator<TransposeNPUKernel>) | |||
| // REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Nhwc2Nchw, NPUKernelCreator<TransposeNPUKernel>) | |||
| // REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Nchw2Nhwc, NPUKernelCreator<TransposeNPUKernel>) | |||
| } // namespace mindspore::kernel | |||
| @@ -34,10 +34,11 @@ | |||
| #if SUPPORT_NPU | |||
| #include "src/runtime/agent/npu/subgraph_npu_kernel.h" | |||
| #include "src/runtime/agent/npu/npu_manager.h" | |||
| #include "src/runtime/agent/npu/npu_transform_pass.h" | |||
| #include "src/runtime/agent/npu/npu_fusion_pass.h" | |||
| #include "src/runtime/agent/npu/npu_add_transform_pass.h" | |||
| #include "src/runtime/agent/npu/npu_concat_transform_pass.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_manager.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_transform_pass.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_fusion_pass.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_add_transform_pass.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h" | |||
| #endif | |||
| namespace mindspore::lite { | |||
| using kernel::KERNEL_ARCH::kCPU; | |||
| @@ -89,12 +90,12 @@ void Scheduler::FindNodeInoutTensors(const lite::Model::Node &node, std::vector< | |||
| auto in_size = node.input_indices_.size(); | |||
| inputs->reserve(in_size); | |||
| for (size_t j = 0; j < in_size; ++j) { | |||
| inputs->emplace_back(src_tensors_.at(node.input_indices_[j])); | |||
| inputs->emplace_back(src_tensors_->at(node.input_indices_[j])); | |||
| } | |||
| auto out_size = node.output_indices_.size(); | |||
| outputs->reserve(out_size); | |||
| for (size_t j = 0; j < out_size; ++j) { | |||
| outputs->emplace_back(src_tensors_.at(node.output_indices_[j])); | |||
| outputs->emplace_back(src_tensors_->at(node.output_indices_[j])); | |||
| } | |||
| } | |||
| @@ -303,11 +304,11 @@ int Scheduler::ScheduleSubGraphToKernels(size_t subgraph_index, std::vector<kern | |||
| } | |||
| if (in_tensors != nullptr) { | |||
| std::transform(subgraph->input_indices_.begin(), subgraph->input_indices_.end(), std::back_inserter(*in_tensors), | |||
| [&](const uint32_t index) { return this->src_tensors_.at(index); }); | |||
| [&](const uint32_t index) { return this->src_tensors_->at(index); }); | |||
| } | |||
| if (out_tensors != nullptr) { | |||
| std::transform(subgraph->output_indices_.begin(), subgraph->output_indices_.end(), std::back_inserter(*out_tensors), | |||
| [&](const uint32_t index) { return this->src_tensors_.at(index); }); | |||
| [&](const uint32_t index) { return this->src_tensors_->at(index); }); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -567,37 +568,16 @@ void Scheduler::FindAllInoutKernels(const std::vector<kernel::LiteKernel *> &ker | |||
| int Scheduler::RunPass(std::vector<kernel::LiteKernel *> *dst_kernels) { | |||
| int ret = RET_OK; | |||
| #if SUPPORT_NPU | |||
| auto transform_pass = new NPUTransformPass; | |||
| ret = transform_pass->FormatTransformPass(context_, dst_kernels, &src_tensors_); | |||
| delete transform_pass; | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Run npu format transform pass failed."; | |||
| return ret; | |||
| } | |||
| auto add_format_pass = new NPUAddTransformPass; | |||
| ret = add_format_pass->Run(context_, dst_kernels, &src_tensors_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Run npu add op insert transform pass failed."; | |||
| return ret; | |||
| } | |||
| delete add_format_pass; | |||
| auto concat_format_pass = new NPUConcatTransformPass; | |||
| ret = concat_format_pass->Run(context_, dst_kernels, &src_tensors_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Run npu concat op insert transform pass failed."; | |||
| return ret; | |||
| } | |||
| delete concat_format_pass; | |||
| auto transform_pass = new NPUTransformPass(context_, dst_kernels, src_tensors_); | |||
| mindspore::lite::NPUPassManager::GetInstance()->AddPass(transform_pass); | |||
| auto add_format_pass = new NPUAddTransformPass(context_, dst_kernels, src_tensors_); | |||
| mindspore::lite::NPUPassManager::GetInstance()->AddPass(add_format_pass); | |||
| auto concat_format_pass = new NPUConcatTransformPass(context_, dst_kernels, src_tensors_); | |||
| mindspore::lite::NPUPassManager::GetInstance()->AddPass(concat_format_pass); | |||
| auto fusion_pass = new NPUFusionPass(dst_kernels); | |||
| ret = fusion_pass->Fusion(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Run npu fussion transform pass failed."; | |||
| return ret; | |||
| } | |||
| delete fusion_pass; | |||
| mindspore::lite::NPUPassManager::GetInstance()->AddPass(fusion_pass); | |||
| ret = mindspore::lite::NPUPassManager::GetInstance()->Run(); | |||
| #endif | |||
| return ret; | |||
| } | |||
| @@ -28,8 +28,8 @@ | |||
| namespace mindspore::lite { | |||
| class Scheduler { | |||
| public: | |||
| Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> src_tensors) | |||
| : context_(ctx), src_model_(src_model), src_tensors_(std::move(src_tensors)) {} | |||
| Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> *src_tensors) | |||
| : context_(ctx), src_model_(src_model), src_tensors_(src_tensors) {} | |||
| ~Scheduler() = default; | |||
| int Schedule(std::vector<kernel::LiteKernel *> *dst_kernels); | |||
| @@ -85,7 +85,7 @@ class Scheduler { | |||
| protected: | |||
| const InnerContext *context_ = nullptr; | |||
| Model *src_model_ = nullptr; | |||
| std::vector<Tensor *> src_tensors_; | |||
| std::vector<Tensor *> *src_tensors_; | |||
| std::vector<size_t> graph_output_node_indexes_; | |||
| }; | |||
| } // namespace mindspore::lite | |||