| @@ -95,6 +95,8 @@ class LiteKernel { | |||||
| virtual int Init() { return mindspore::lite::RET_ERROR; } | virtual int Init() { return mindspore::lite::RET_ERROR; } | ||||
| OpParameter *op_parameter() { return op_parameter_; } | |||||
| std::string name() const { return this->name_; } | std::string name() const { return this->name_; } | ||||
| virtual int Train() { | virtual int Train() { | ||||
| @@ -479,12 +479,6 @@ int LiteSession::Init(const Context *context) { | |||||
| is_running_.store(false); | is_running_.store(false); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = InitNPURuntime(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init NPU runtime failed."; | |||||
| is_running_.store(false); | |||||
| return ret; | |||||
| } | |||||
| executor_ = new (std::nothrow) Executor(); | executor_ = new (std::nothrow) Executor(); | ||||
| if (nullptr == executor_) { | if (nullptr == executor_) { | ||||
| MS_LOG(ERROR) << "New Executor failed"; | MS_LOG(ERROR) << "New Executor failed"; | ||||
| @@ -661,18 +655,6 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int LiteSession::InitNPURuntime() { | |||||
| #if SUPPORT_NPU | |||||
| if (this->context_->IsNpuEnabled()) { | |||||
| if (mindspore::lite::NPUManager::GetInstance()->InitClient() != RET_OK) { | |||||
| MS_LOG(ERROR) << "NPU client init error."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | |||||
| #endif | |||||
| return RET_OK; | |||||
| } | |||||
| int LiteSession::InitGPURuntime() { | int LiteSession::InitGPURuntime() { | ||||
| #if SUPPORT_GPU | #if SUPPORT_GPU | ||||
| if (this->context_->IsGpuEnabled()) { | if (this->context_->IsGpuEnabled()) { | ||||
| @@ -103,8 +103,6 @@ class LiteSession : public session::LiteSession { | |||||
| private: | private: | ||||
| void ResetInputsShape(const std::vector<std::vector<int>> &dims); | void ResetInputsShape(const std::vector<std::vector<int>> &dims); | ||||
| int InitNPURuntime(); | |||||
| int InitGPURuntime(); | int InitGPURuntime(); | ||||
| protected: | protected: | ||||
| @@ -17,10 +17,9 @@ | |||||
| #include "src/runtime/agent/npu/npu_executor.h" | #include "src/runtime/agent/npu/npu_executor.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/runtime/agent/npu/npu_manager.h" | #include "src/runtime/agent/npu/npu_manager.h" | ||||
| #include "nnacl/pack.h" | |||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { | int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { | ||||
| this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(); | |||||
| this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(model_name_); | |||||
| if (this->client_ == nullptr) { | if (this->client_ == nullptr) { | ||||
| MS_LOG(ERROR) << "client is nullptr."; | MS_LOG(ERROR) << "client is nullptr."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -33,9 +32,8 @@ int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { | |||||
| } | } | ||||
| int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | ||||
| const std::vector<kernel::LiteKernel *> &kernels, const std::vector<bool> &inputs_nhwc2nchw, | |||||
| const std::vector<bool> &outputs_nchw2nhwc, Allocator *allocator, const KernelCallBack &before, | |||||
| const KernelCallBack &after) { | |||||
| const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator, | |||||
| const KernelCallBack &before, const KernelCallBack &after) { | |||||
| hiai::AiContext context; | hiai::AiContext context; | ||||
| for (int i = 0; i < npu_input_tensors_.size(); ++i) { | for (int i = 0; i < npu_input_tensors_.size(); ++i) { | ||||
| void *data = in_tensors[i]->data_c(); | void *data = in_tensors[i]->data_c(); | ||||
| @@ -43,12 +41,7 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector< | |||||
| MS_LOG(ERROR) << model_name_ << " inputs data is nullptr"; | MS_LOG(ERROR) << model_name_ << " inputs data is nullptr"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (inputs_nhwc2nchw[i]) { | |||||
| PackNHWCToNCHWFp32(data, npu_input_tensors_[i]->GetBuffer(), in_tensors[i]->Batch(), | |||||
| in_tensors[i]->Width() * in_tensors[i]->Height(), in_tensors[i]->Channel()); | |||||
| } else { | |||||
| memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[i]->Size()); | |||||
| } | |||||
| memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[i]->Size()); | |||||
| } | } | ||||
| context.AddPara("model_name", model_name_); | context.AddPara("model_name", model_name_); | ||||
| if (this->client_ == nullptr) { | if (this->client_ == nullptr) { | ||||
| @@ -68,12 +61,7 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector< | |||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (outputs_nchw2nhwc[i]) { | |||||
| PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data, out_tensors[i]->Batch(), | |||||
| out_tensors[i]->Width() * out_tensors[i]->Height(), out_tensors[i]->Channel()); | |||||
| } else { | |||||
| memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); | |||||
| } | |||||
| memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); | |||||
| out_tensors[i]->ResetRefCount(); | out_tensors[i]->ResetRefCount(); | ||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -32,8 +32,7 @@ class NPUExecutor : public Executor { | |||||
| int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override; | int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override; | ||||
| int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | ||||
| const std::vector<kernel::LiteKernel *> &kernels, const std::vector<bool> &inputs_nhwc2nchw, | |||||
| const std::vector<bool> &outputs_nchw2nhwc, Allocator *allocator = nullptr, | |||||
| const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator = nullptr, | |||||
| const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr); | const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr); | ||||
| private: | private: | ||||
| @@ -0,0 +1,224 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "src/runtime/agent/npu/npu_fusion_pass.h" | |||||
| #include <vector> | |||||
| #include "src/lite_kernel.h" | |||||
| #include "nnacl/concat_parameter.h" | |||||
| namespace mindspore::lite { | |||||
| bool CheckFusion(kernel::LiteKernel *kernel) { | |||||
| auto pre_flag = | |||||
| std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *kernel) { | |||||
| return kernel->Type() == schema::PrimitiveType_Nchw2Nhwc && kernel->out_kernels().size() == 1; | |||||
| }); | |||||
| if (!pre_flag) { | |||||
| return false; | |||||
| } | |||||
| auto post_flag = | |||||
| std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) { | |||||
| return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw && kernel->in_kernels().size() == 1; | |||||
| }); | |||||
| return post_flag; | |||||
| } | |||||
| void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) { | |||||
| for (auto in_kernel : cur_kernel->in_kernels()) { | |||||
| auto pre_kernel = in_kernel->in_kernels()[0]; | |||||
| auto pre_out_kernels = pre_kernel->out_kernels(); | |||||
| for (size_t i = 0; i < pre_out_kernels.size(); i++) { | |||||
| if (pre_out_kernels[i] == in_kernel) { | |||||
| pre_out_kernels[i] = cur_kernel; | |||||
| break; | |||||
| } | |||||
| } | |||||
| pre_kernel->set_out_kernels(pre_out_kernels); | |||||
| auto cur_in_kernels = cur_kernel->in_kernels(); | |||||
| for (size_t i = 0; i < cur_in_kernels.size(); i++) { | |||||
| if (cur_in_kernels[i] == in_kernel) { | |||||
| cur_in_kernels[i] = pre_kernel; | |||||
| break; | |||||
| } | |||||
| } | |||||
| cur_kernel->set_in_kernels(cur_in_kernels); | |||||
| kernels->erase(find(kernels->begin(), kernels->end(), in_kernel)); | |||||
| } | |||||
| } | |||||
| void NPUFusionPass::UpdatePostKernels(kernel::LiteKernel *cur_kernel) { | |||||
| for (auto out_kernel : cur_kernel->out_kernels()) { | |||||
| auto post_kernel = out_kernel->out_kernels()[0]; | |||||
| auto post_in_kernels = post_kernel->in_kernels(); | |||||
| for (size_t i = 0; i < post_in_kernels.size(); i++) { | |||||
| if (post_in_kernels[i] == out_kernel) { | |||||
| post_in_kernels[i] = cur_kernel; | |||||
| break; | |||||
| } | |||||
| } | |||||
| post_kernel->set_in_kernels(post_in_kernels); | |||||
| auto cur_out_kernels = cur_kernel->out_kernels(); | |||||
| for (size_t i = 0; i < cur_out_kernels.size(); i++) { | |||||
| if (cur_out_kernels[i] == out_kernel) { | |||||
| cur_out_kernels[i] = post_kernel; | |||||
| break; | |||||
| } | |||||
| } | |||||
| cur_kernel->set_out_kernels(cur_out_kernels); | |||||
| kernels->erase(find(kernels->begin(), kernels->end(), out_kernel)); | |||||
| } | |||||
| } | |||||
| void UpdatePreTensors(kernel::LiteKernel *cur_kernel) { | |||||
| auto tensors_vec = cur_kernel->in_tensors(); | |||||
| for (auto in_kernel : cur_kernel->in_kernels()) { | |||||
| lite::Tensor *cur_tensor = nullptr; | |||||
| auto in_tensor = in_kernel->in_tensors()[0]; | |||||
| auto out_tensor = in_kernel->out_tensors()[0]; | |||||
| auto pre_kernel = in_kernel->in_kernels()[0]; | |||||
| for (size_t i = 0; i < pre_kernel->out_tensors().size(); i++) { | |||||
| if (pre_kernel->out_tensors()[i] == in_tensor) { | |||||
| cur_tensor = pre_kernel->out_tensors()[i]; | |||||
| } | |||||
| } | |||||
| for (size_t i = 0; i < tensors_vec.size(); i++) { | |||||
| if (tensors_vec[i] == out_tensor) { | |||||
| tensors_vec[i] = cur_tensor; | |||||
| } | |||||
| } | |||||
| } | |||||
| cur_kernel->set_in_tensors(tensors_vec); | |||||
| } | |||||
| void UpdatePostTensors(kernel::LiteKernel *cur_kernel) { | |||||
| auto tensors_vec = cur_kernel->out_tensors(); | |||||
| for (auto out_kernel : cur_kernel->out_kernels()) { | |||||
| auto in_tensor = out_kernel->in_tensors()[0]; | |||||
| auto out_tensor = out_kernel->out_tensors()[0]; | |||||
| auto post_kernel = out_kernel->out_kernels()[0]; | |||||
| lite::Tensor *cur_tensor = nullptr; | |||||
| for (size_t i = 0; i < post_kernel->in_tensors().size(); i++) { | |||||
| if (post_kernel->in_tensors()[i] == out_tensor) { | |||||
| cur_tensor = post_kernel->in_tensors()[i]; | |||||
| } | |||||
| } | |||||
| for (size_t i = 0; i < tensors_vec.size(); i++) { | |||||
| if (tensors_vec[i] == in_tensor) { | |||||
| tensors_vec[i] = cur_tensor; | |||||
| } | |||||
| } | |||||
| } | |||||
| cur_kernel->set_out_tensors(tensors_vec); | |||||
| } | |||||
| int TransFormAxis(int axis) { | |||||
| switch (axis) { | |||||
| case 0: | |||||
| return 0; | |||||
| case 1: | |||||
| return 2; | |||||
| case 2: | |||||
| return 3; | |||||
| case 3: | |||||
| case -1: | |||||
| return 1; | |||||
| default: | |||||
| return -2; | |||||
| } | |||||
| } | |||||
| int NPUFusionPass::AddFusion(kernel::LiteKernel *kernel) { | |||||
| if (!CheckFusion(kernel)) { | |||||
| return RET_OK; | |||||
| } | |||||
| UpdatePreTensors(kernel); | |||||
| UpdatePostTensors(kernel); | |||||
| UpdatePreKernels(kernel); | |||||
| UpdatePostKernels(kernel); | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUFusionPass::ConcatFusion(kernel::LiteKernel *kernel) { | |||||
| if (!CheckFusion(kernel)) { | |||||
| return RET_OK; | |||||
| } | |||||
| UpdatePreTensors(kernel); | |||||
| UpdatePostTensors(kernel); | |||||
| UpdatePreKernels(kernel); | |||||
| UpdatePostKernels(kernel); | |||||
| auto concat_param = reinterpret_cast<ConcatParameter *>(kernel->op_parameter()); | |||||
| concat_param->axis_ = TransFormAxis(concat_param->axis_); | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) { | |||||
| if (kernel->out_kernels().empty()) { | |||||
| return RET_OK; | |||||
| } | |||||
| if (!std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) { | |||||
| return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw; | |||||
| })) { | |||||
| return RET_OK; | |||||
| } | |||||
| auto pre_kernel = kernel->in_kernels()[0]; | |||||
| auto pre_out_kernels = pre_kernel->out_kernels(); | |||||
| for (size_t i = 0; i < pre_out_kernels.size(); i++) { | |||||
| if (pre_out_kernels[i] == kernel) { | |||||
| pre_out_kernels.erase(pre_out_kernels.begin() + i); | |||||
| break; | |||||
| } | |||||
| } | |||||
| for (const auto &nc2nh : kernel->out_kernels()) { | |||||
| for (const auto &post_kernel : nc2nh->out_kernels()) { | |||||
| auto post_in_kernels = post_kernel->in_kernels(); | |||||
| for (size_t i = 0; i < post_in_kernels.size(); i++) { | |||||
| if (post_in_kernels[i] == nc2nh) { | |||||
| post_in_kernels[i] = pre_kernel; | |||||
| break; | |||||
| } | |||||
| } | |||||
| post_kernel->set_in_kernels(post_in_kernels); | |||||
| pre_out_kernels.push_back(post_kernel); | |||||
| } | |||||
| kernels->erase(find(kernels->begin(), kernels->end(), nc2nh)); | |||||
| } | |||||
| pre_kernel->set_out_kernels(pre_out_kernels); | |||||
| kernels->erase(find(kernels->begin(), kernels->end(), kernel)); | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUFusionPass::Fusion() { | |||||
| for (auto kernel : *kernels) { | |||||
| switch (kernel->Type()) { | |||||
| case schema::PrimitiveType_Concat: | |||||
| ConcatFusion(kernel); | |||||
| continue; | |||||
| case schema::PrimitiveType_Add: | |||||
| AddFusion(kernel); | |||||
| continue; | |||||
| case schema::PrimitiveType_Nchw2Nhwc: | |||||
| FormatFusion(kernel); | |||||
| continue; | |||||
| default: | |||||
| continue; | |||||
| } | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| } // namespace mindspore::lite | |||||
| @@ -0,0 +1,40 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_ | |||||
| #include <vector> | |||||
| #include "src/lite_kernel.h" | |||||
| #include "src/ops/primitive_c.h" | |||||
| namespace mindspore::lite { | |||||
| class NPUFusionPass { | |||||
| public: | |||||
| explicit NPUFusionPass(std::vector<kernel::LiteKernel *> *dst_kernels) { kernels = dst_kernels; } | |||||
| ~NPUFusionPass() = default; | |||||
| int Fusion(); | |||||
| protected: | |||||
| int ConcatFusion(kernel::LiteKernel *kernel); | |||||
| int AddFusion(kernel::LiteKernel *kernel); | |||||
| int FormatFusion(kernel::LiteKernel *kernel); | |||||
| void UpdatePreKernels(kernel::LiteKernel *kernel); | |||||
| void UpdatePostKernels(kernel::LiteKernel *kernel); | |||||
| private: | |||||
| std::vector<kernel::LiteKernel *> *kernels; | |||||
| }; | |||||
| } // namespace mindspore::lite | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_ | |||||
| @@ -15,57 +15,65 @@ | |||||
| */ | */ | ||||
| #include "src/runtime/agent/npu/npu_manager.h" | #include "src/runtime/agent/npu/npu_manager.h" | ||||
| #include <sys/system_properties.h> | |||||
| #include <sys/fcntl.h> | #include <sys/fcntl.h> | ||||
| #include <unistd.h> | #include <unistd.h> | ||||
| #include "include/hiai_ir_build.h" | #include "include/hiai_ir_build.h" | ||||
| #include "include/HiAiModelManagerService.h" | #include "include/HiAiModelManagerService.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "include/graph/op/all_ops.h" | |||||
| #include "src/common/file_utils.h" | #include "src/common/file_utils.h" | ||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| #define MAX_MODEL_NUM 20 | |||||
| int NPUManager::CompareVersion(const string &version1, const string &version2) { | |||||
| std::istringstream iss1(version1); | |||||
| std::istringstream iss2(version2); | |||||
| string string1; | |||||
| string string2; | |||||
| while (!iss1.eof() || !iss2.eof()) { | |||||
| getline(iss1, string1, '.'); | |||||
| getline(iss2, string2, '.'); | |||||
| if (stoi(string1) > stoi(string2)) return 1; | |||||
| if (stoi(string1) < stoi(string2)) return -1; | |||||
| string1 = string2 = "0"; | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| bool NPUManager::IsSupportNPU() { | |||||
| if (!is_npu_check_executor) { | |||||
| CheckSupportNPU(); | |||||
| } | |||||
| if (is_support_npu) { | |||||
| MS_LOG(INFO) << "The current device support NPU."; | |||||
| return true; | |||||
| } else { | |||||
| MS_LOG(INFO) << "The current device NOT SUPPORT NPU."; | |||||
| return false; | |||||
| bool NPUManager::CheckEMUIVersion() { | |||||
| char emui[128] = {0x00}; | |||||
| __system_property_get("ro.build.version.emui", emui); | |||||
| std::string emui_str = emui; | |||||
| int pos = emui_str.find('_'); | |||||
| if (pos != std::string::npos) { | |||||
| auto version = emui_str.substr(pos + 1); | |||||
| int ret = CompareVersion(version, "11.0.0"); | |||||
| if (ret < 0) { | |||||
| return false; | |||||
| } | |||||
| } | } | ||||
| return true; | |||||
| } | } | ||||
| std::string NPUManager::GetExecutorPath() { | |||||
| std::string executor_path; | |||||
| char cmdline[1024] = {0}; | |||||
| int fd = open("/proc/self/cmdline", O_RDONLY); | |||||
| if (fd >= 0) { | |||||
| char ch; | |||||
| int i = 0; | |||||
| while (read(fd, &ch, sizeof(ch)) > 0 && !isspace(ch)) { | |||||
| if (':' == ch) { | |||||
| break; | |||||
| } | |||||
| cmdline[i] = ch; | |||||
| i++; | |||||
| bool NPUManager::CheckDDKVersion() { | |||||
| auto client = std::make_shared<hiai::AiModelMngerClient>(); | |||||
| if (client->GetVersion() != nullptr) { | |||||
| std::string version = client->GetVersion(); | |||||
| int ret = CompareVersion(version, "100.330.010.011"); | |||||
| if (ret < 0) { | |||||
| return false; | |||||
| } | } | ||||
| close(fd); | |||||
| } | } | ||||
| executor_path = std::string(cmdline); | |||||
| if (executor_path.empty()) { | |||||
| executor_path = "./"; | |||||
| } | |||||
| // android | |||||
| if (executor_path.substr(0, 11) == "/data/data/") { | |||||
| executor_path = executor_path + '/'; | |||||
| return true; | |||||
| } | |||||
| bool NPUManager::IsSupportNPU() { | |||||
| if (IsKirinChip() && CheckEMUIVersion() && CheckDDKVersion()) { | |||||
| MS_LOG(INFO) << "The current device support NPU."; | |||||
| return true; | |||||
| } else { | } else { | ||||
| // Linux | |||||
| executor_path = executor_path.substr(0, executor_path.rfind('/')) + "/"; | |||||
| MS_LOG(INFO) << "The current device NOT SUPPORT NPU."; | |||||
| return false; | |||||
| } | } | ||||
| return executor_path; | |||||
| } | } | ||||
| bool NPUManager::IsKirinChip() { | bool NPUManager::IsKirinChip() { | ||||
| @@ -96,86 +104,6 @@ bool NPUManager::IsKirinChip() { | |||||
| return false; | return false; | ||||
| } | } | ||||
| bool WriteToOMFile(domi::ModelBufferData om_model_buff, const std::string &om_file_path) { | |||||
| FILE *fp; | |||||
| fp = fopen(om_file_path.c_str(), "wb"); | |||||
| if (fp == nullptr) { | |||||
| MS_LOG(ERROR) << om_file_path.c_str() << " open failed."; | |||||
| return false; | |||||
| } | |||||
| auto write_size = (uint32_t)fwrite(om_model_buff.data, 1, om_model_buff.length, fp); | |||||
| if (write_size != om_model_buff.length) { | |||||
| fclose(fp); | |||||
| MS_LOG(ERROR) << "Write om file failed."; | |||||
| return false; | |||||
| } | |||||
| fclose(fp); | |||||
| return true; | |||||
| } | |||||
| bool NPUManager::CheckOmBuildIr(const std::string &path) { | |||||
| // build test om model | |||||
| std::shared_ptr<hiai::op::Add> add_op(new (std::nothrow) hiai::op::Add("add")); | |||||
| if (add_op == nullptr) { | |||||
| MS_LOG(ERROR) << "new add_op failed."; | |||||
| return false; | |||||
| } | |||||
| ge::TensorDesc desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT); | |||||
| std::shared_ptr<hiai::op::Data> data = std::make_shared<hiai::op::Data>("data"); | |||||
| data->update_input_desc_x(desc); | |||||
| add_op->set_input_x1(*data); | |||||
| add_op->set_input_x2(*data); | |||||
| domi::HiaiIrBuild ir_build; | |||||
| ge::Graph ir_graph("graph"); | |||||
| std::vector<ge::Operator> inputs{*data, *data}; | |||||
| std::vector<ge::Operator> outputs{*add_op}; | |||||
| ir_graph.SetInputs(inputs).SetOutputs(outputs); | |||||
| ge::Model om_model("test_model", "test_version"); | |||||
| om_model.SetGraph(ir_graph); | |||||
| domi::ModelBufferData om_model_buff; | |||||
| if (!ir_build.CreateModelBuff(om_model, om_model_buff)) { | |||||
| MS_LOG(ERROR) << "Create model buffer failed."; | |||||
| return false; | |||||
| } | |||||
| if (!ir_build.BuildIRModel(om_model, om_model_buff)) { | |||||
| MS_LOG(ERROR) << "Build IR model failed."; | |||||
| return false; | |||||
| } | |||||
| // save test om model | |||||
| remove(path.c_str()); | |||||
| bool ret = WriteToOMFile(om_model_buff, path); | |||||
| ir_build.ReleaseModelBuff(om_model_buff); | |||||
| return ret; | |||||
| } | |||||
| void NPUManager::CheckSupportNPU() { | |||||
| is_npu_check_executor = true; | |||||
| std::string path_string = GetExecutorPath(); | |||||
| std::string test_model_path = path_string + "/mindspore_lite_test_npu.om"; | |||||
| std::ifstream ifs(test_model_path); | |||||
| if (ifs.good() && ifs.is_open()) { | |||||
| ifs.close(); | |||||
| is_support_npu = true; | |||||
| return; | |||||
| } | |||||
| if (!IsKirinChip()) { | |||||
| MS_LOG(ERROR) << "The current device chip NOT SUPPORT NPU"; | |||||
| is_support_npu = false; | |||||
| return; | |||||
| } | |||||
| if (!CheckOmBuildIr(test_model_path)) { | |||||
| MS_LOG(ERROR) << "Build OM IR error."; | |||||
| is_support_npu = false; | |||||
| return; | |||||
| } | |||||
| is_support_npu = true; | |||||
| } | |||||
| int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency) { | int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency) { | ||||
| hiai::MemBuffer *buffer = mc_builder_->InputMemBufferCreate(model_buf, size); | hiai::MemBuffer *buffer = mc_builder_->InputMemBufferCreate(model_buf, size); | ||||
| if (buffer == nullptr) { | if (buffer == nullptr) { | ||||
| @@ -188,33 +116,42 @@ int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &mode | |||||
| model_desc_.push_back(desc); | model_desc_.push_back(desc); | ||||
| mc_builder_->MemBufferDestroy(buffer); | mc_builder_->MemBufferDestroy(buffer); | ||||
| model_map_.insert({model_name, index_}); | |||||
| index_++; | index_++; | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int NPUManager::InitClient() { | |||||
| this->client_ = std::make_shared<hiai::AiModelMngerClient>(); | |||||
| if (this->client_ == nullptr) { | |||||
| return RET_ERROR; | |||||
| } | |||||
| int ret = this->client_->Init(nullptr); | |||||
| if (ret != hiai::AI_SUCCESS) { | |||||
| return RET_ERROR; | |||||
| } | |||||
| mc_builder_ = std::make_shared<hiai::AiModelBuilder>(this->client_); | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUManager::LoadOMModel() { | int NPUManager::LoadOMModel() { | ||||
| int ret = this->client_->Load(model_desc_); | |||||
| if (ret != hiai::AI_SUCCESS) { | |||||
| MS_LOG(ERROR) << "Client load model failed." << ret; | |||||
| return RET_ERROR; | |||||
| for (int i = 0; i < index_ / MAX_MODEL_NUM + 1; i++) { | |||||
| auto client = std::make_shared<hiai::AiModelMngerClient>(); | |||||
| if (client == nullptr) { | |||||
| MS_LOG(ERROR) << "NPU client is nullptr."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| int ret = client->Init(nullptr); | |||||
| if (ret != hiai::AI_SUCCESS) { | |||||
| MS_LOG(ERROR) << "NPU client init failed. code is " << ret; | |||||
| return RET_ERROR; | |||||
| } | |||||
| mc_builder_ = std::make_shared<hiai::AiModelBuilder>(client); | |||||
| vector<std::shared_ptr<hiai::AiModelDescription>> desc(model_desc_.begin() + i * MAX_MODEL_NUM, | |||||
| ((i + 1) * MAX_MODEL_NUM > index_) | |||||
| ? model_desc_.begin() + index_ | |||||
| : model_desc_.begin() + (i + 1) * MAX_MODEL_NUM); | |||||
| ret = client->Load(desc); | |||||
| if (ret != hiai::AI_SUCCESS) { | |||||
| MS_LOG(ERROR) << "Client load model failed." << ret; | |||||
| return RET_ERROR; | |||||
| } | |||||
| clients_.push_back(client); | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient() { return client_; } | |||||
| std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient(const std::string &model_name) { | |||||
| return clients_[model_map_[model_name] / MAX_MODEL_NUM]; | |||||
| } | |||||
| int NPUManager::index() { return index_; } | |||||
| int NPUManager::index() const { return index_; } | |||||
| } // namespace mindspore::lite | } // namespace mindspore::lite | ||||
| @@ -14,15 +14,21 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_ | |||||
| #include <string> | #include <string> | ||||
| #include <memory> | #include <memory> | ||||
| #include <vector> | #include <vector> | ||||
| #include <unordered_map> | |||||
| #include <set> | |||||
| #include "schema/model_generated.h" | |||||
| #include "include/HiAiModelManagerService.h" | #include "include/HiAiModelManagerService.h" | ||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| static std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = { | |||||
| schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, | |||||
| schema::PrimitiveType_DepthwiseConv2D, schema::PrimitiveType_DeDepthwiseConv2D, | |||||
| schema::PrimitiveType_Resize, schema::PrimitiveType_Pooling}; | |||||
| class NPUManager { | class NPUManager { | ||||
| public: | public: | ||||
| static NPUManager *GetInstance() { | static NPUManager *GetInstance() { | ||||
| @@ -32,8 +38,6 @@ class NPUManager { | |||||
| bool IsSupportNPU(); | bool IsSupportNPU(); | ||||
| int InitClient(); | |||||
| // provide to subgraph to add model. | // provide to subgraph to add model. | ||||
| int AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency); | int AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency); | ||||
| @@ -41,18 +45,18 @@ class NPUManager { | |||||
| int LoadOMModel(); | int LoadOMModel(); | ||||
| // provide to executor. | // provide to executor. | ||||
| std::shared_ptr<hiai::AiModelMngerClient> GetClient(); | |||||
| std::shared_ptr<hiai::AiModelMngerClient> GetClient(const std::string &model_name); | |||||
| int index(); | |||||
| int index() const; | |||||
| private: | private: | ||||
| void CheckSupportNPU(); | |||||
| bool IsKirinChip(); | bool IsKirinChip(); | ||||
| bool CheckOmBuildIr(const std::string &path); | |||||
| bool CheckEMUIVersion(); | |||||
| std::string GetExecutorPath(); | |||||
| bool CheckDDKVersion(); | |||||
| int CompareVersion(const std::string &version1, const std::string &version2); | |||||
| private: | private: | ||||
| int index_ = 0; | int index_ = 0; | ||||
| @@ -61,12 +65,14 @@ class NPUManager { | |||||
| bool is_support_npu = false; | bool is_support_npu = false; | ||||
| std::shared_ptr<hiai::AiModelMngerClient> client_ = nullptr; | |||||
| std::vector<std::shared_ptr<hiai::AiModelMngerClient>> clients_; | |||||
| std::vector<std::shared_ptr<hiai::AiModelDescription>> model_desc_; | std::vector<std::shared_ptr<hiai::AiModelDescription>> model_desc_; | ||||
| std::shared_ptr<hiai::AiModelBuilder> mc_builder_ = nullptr; | std::shared_ptr<hiai::AiModelBuilder> mc_builder_ = nullptr; | ||||
| std::unordered_map<std::string, int> model_map_; | |||||
| }; | }; | ||||
| } // namespace mindspore::lite | } // namespace mindspore::lite | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_ | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_ | |||||
| @@ -0,0 +1,102 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "src/kernel_registry.h" | |||||
| #include "src/ops/nhwc2nchw.h" | |||||
| #include "src/ops/nchw2nhwc.h" | |||||
| #include "src/runtime/agent/npu/npu_pass_utils.h" | |||||
| namespace mindspore::lite { | |||||
| using kernel::KERNEL_ARCH::kCPU; | |||||
| using kernel::KERNEL_ARCH::kNPU; | |||||
| PrimitiveC *NPUPassUtils::CreateNchw2NhwcPrimitive() { | |||||
| flatbuffers::FlatBufferBuilder fbb(1024); | |||||
| auto val_offset = schema::CreateNchw2Nhwc(fbb); | |||||
| auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nchw2Nhwc, val_offset.o); | |||||
| fbb.Finish(prim_offset); | |||||
| auto buf = fbb.GetBufferPointer(); | |||||
| if (buf == nullptr) { | |||||
| MS_LOG(ERROR) << "GetBufferPointer return nullptr"; | |||||
| fbb.Clear(); | |||||
| return nullptr; | |||||
| } | |||||
| auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize())); | |||||
| if (primitive_buf == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc primitive_buf_ failed."; | |||||
| fbb.Clear(); | |||||
| return nullptr; | |||||
| } | |||||
| memcpy(primitive_buf, buf, fbb.GetSize()); | |||||
| auto *primitive = PrimitiveC::NewPrimitiveC<Nchw2Nhwc>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf)); | |||||
| free(primitive_buf); | |||||
| fbb.Clear(); | |||||
| return primitive; | |||||
| } | |||||
| PrimitiveC *NPUPassUtils::CreateNhwc2NchwPrimitive() { | |||||
| flatbuffers::FlatBufferBuilder fbb(1024); | |||||
| auto val_offset = schema::CreateNhwc2Nchw(fbb); | |||||
| auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nhwc2Nchw, val_offset.o); | |||||
| fbb.Finish(prim_offset); | |||||
| auto buf = fbb.GetBufferPointer(); | |||||
| if (buf == nullptr) { | |||||
| MS_LOG(ERROR) << "GetBufferPointer return nullptr"; | |||||
| fbb.Clear(); | |||||
| return nullptr; | |||||
| } | |||||
| auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize())); | |||||
| if (primitive_buf == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc primitive_buf_ failed."; | |||||
| fbb.Clear(); | |||||
| return nullptr; | |||||
| } | |||||
| memcpy(primitive_buf, buf, fbb.GetSize()); | |||||
| auto *primitive = PrimitiveC::NewPrimitiveC<Nhwc2Nchw>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf)); | |||||
| free(primitive_buf); | |||||
| fbb.Clear(); | |||||
| return primitive; | |||||
| } | |||||
| kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors, | |||||
| const std::vector<Tensor *> &out_tensors, | |||||
| const InnerContext *ctx, const std::string &name) { | |||||
| kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nchw2Nhwc}; | |||||
| auto nchw2nhwc_primitive = CreateNchw2NhwcPrimitive(); | |||||
| auto *nchw2nhwc_kernel = | |||||
| KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nchw2nhwc_primitive, ctx, key); | |||||
| nchw2nhwc_kernel->set_name(name); | |||||
| return nchw2nhwc_kernel; | |||||
| } | |||||
| kernel::LiteKernel *NPUPassUtils::CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors, | |||||
| const std::vector<Tensor *> &out_tensors, | |||||
| const InnerContext *ctx, const std::string &name) { | |||||
| kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nhwc2Nchw}; | |||||
| auto nhwc2nchw_primitive = CreateNhwc2NchwPrimitive(); | |||||
| auto *nhwc2nchw_kernel = | |||||
| KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nhwc2nchw_primitive, ctx, key); | |||||
| nhwc2nchw_kernel->set_name(name); | |||||
| return nhwc2nchw_kernel; | |||||
| } | |||||
| void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels, | |||||
| const std::vector<kernel::LiteKernel *> &out_kernels, | |||||
| const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors) { | |||||
| kernel->set_in_tensors(in_tensors); | |||||
| kernel->set_out_tensors(out_tensors); | |||||
| kernel->set_in_kernels(in_kernels); | |||||
| kernel->set_out_kernels(out_kernels); | |||||
| } | |||||
| } // namespace mindspore::lite | |||||
| @@ -0,0 +1,44 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_ | |||||
| #include <vector> | |||||
| #include <string> | |||||
| #include "src/ops/primitive_c.h" | |||||
| #include "src/lite_kernel.h" | |||||
| namespace mindspore::lite { | |||||
| class NPUPassUtils { | |||||
| public: | |||||
| static kernel::LiteKernel *CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors, | |||||
| const std::vector<Tensor *> &out_tensors, const InnerContext *ctx, | |||||
| const std::string &name); | |||||
| static kernel::LiteKernel *CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors, | |||||
| const std::vector<Tensor *> &out_tensors, const InnerContext *ctx, | |||||
| const std::string &name); | |||||
| static void UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels, | |||||
| const std::vector<kernel::LiteKernel *> &out_kernels, | |||||
| const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors); | |||||
| private: | |||||
| static PrimitiveC *CreateNchw2NhwcPrimitive(); | |||||
| static PrimitiveC *CreateNhwc2NchwPrimitive(); | |||||
| }; | |||||
| } // namespace mindspore::lite | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_ | |||||
| @@ -0,0 +1,201 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "src/runtime/agent/npu/npu_transform_pass.h" | |||||
| #include <vector> | |||||
| #include "src/lite_kernel.h" | |||||
| #include "src/runtime/agent/npu/npu_manager.h" | |||||
| #include "src/runtime/agent/npu/npu_pass_utils.h" | |||||
| namespace mindspore::lite { | |||||
| using kernel::KERNEL_ARCH::kCPU; | |||||
| using kernel::KERNEL_ARCH::kNPU; | |||||
| int NPUTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *after_kernel) { | |||||
| std::vector<kernel::LiteKernel *> out_kernels; | |||||
| for (auto out_kernel : kernel->out_kernels()) { | |||||
| if (out_kernel == after_kernel) { | |||||
| out_kernels.push_back(trans_kernel); | |||||
| } else { | |||||
| out_kernels.push_back(out_kernel); | |||||
| } | |||||
| } | |||||
| NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors()); | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUTransformPass::UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *before_kernel) { | |||||
| std::vector<lite::Tensor *> cur_kernel_in_tensors = {trans_kernel->out_tensors()[0]}; | |||||
| for (int i = 1; i < kernel->in_tensors().size(); i++) { | |||||
| cur_kernel_in_tensors.push_back(kernel->in_tensors()[i]); | |||||
| } | |||||
| std::vector<kernel::LiteKernel *> cur_in_kernels = {trans_kernel}; | |||||
| for (int i = 0; i < kernel->in_kernels().size(); i++) { | |||||
| auto in_kernel = kernel->in_kernels()[i]; | |||||
| if (in_kernel != kernel) { | |||||
| cur_in_kernels.push_back(in_kernel); | |||||
| } | |||||
| } | |||||
| NPUPassUtils::UpdateKernel(kernel, cur_in_kernels, kernel->out_kernels(), cur_kernel_in_tensors, | |||||
| kernel->out_tensors()); | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||||
| std::vector<kernel::LiteKernel *> *all_kernels, | |||||
| std::vector<Tensor *> *all_tensors) { | |||||
| auto kernel = *it; | |||||
| bool is_input_kernel = kernel->in_kernels().empty(); | |||||
| if (is_input_kernel || kernel->in_kernels()[0]->desc().arch != kNPU || | |||||
| npu_trans_nodes.find(kernel->in_kernels()[0]->Type()) == npu_trans_nodes.end()) { | |||||
| kernel::LiteKernel *before_kernel = nullptr; | |||||
| if (!is_input_kernel) { | |||||
| before_kernel = kernel->in_kernels()[0]; | |||||
| } | |||||
| // Create pre transform kernel out tensors. | |||||
| std::vector<int> shapes{kernel->in_tensors()[0]->shape()[0], kernel->in_tensors()[0]->shape()[3], | |||||
| kernel->in_tensors()[0]->shape()[1], kernel->in_tensors()[0]->shape()[2]}; | |||||
| auto tensor = new Tensor(kernel->in_tensors()[0]->data_type(), shapes, schema::Format_NCHW, Tensor::VAR); | |||||
| std::vector<Tensor *> pre_trans_out_tensors = {tensor}; | |||||
| all_tensors->push_back(pre_trans_out_tensors[0]); | |||||
| // Replace the output tensor of the previous node | |||||
| auto name = kernel->name() + "_pre_trans" + "_Nhwc2Nchw_" + std::to_string(total++); | |||||
| auto *pre_trans_kernel = | |||||
| NPUPassUtils::CreateNhwc2NchwKernel({kernel->in_tensors()[0]}, pre_trans_out_tensors, context, name); | |||||
| // Insert Nhwc2Nchw into the front of the current queue | |||||
| all_kernels->push_back(pre_trans_kernel); | |||||
| // Replace the output kernel of the previous node | |||||
| std::vector<kernel::LiteKernel *> pre_trans_in_kernel; | |||||
| if (is_input_kernel) { | |||||
| pre_trans_in_kernel = {}; | |||||
| } else { | |||||
| pre_trans_in_kernel = {before_kernel}; | |||||
| } | |||||
| NPUPassUtils::UpdateKernel(pre_trans_kernel, pre_trans_in_kernel, {kernel}, {kernel->in_tensors()[0]}, | |||||
| pre_trans_out_tensors); | |||||
| if (before_kernel != nullptr) { | |||||
| UpdateNH2NCTransNodePreKernel(before_kernel, pre_trans_kernel, kernel); | |||||
| } | |||||
| UpdateNH2NCTransNodeAfterKernel(kernel, pre_trans_kernel, before_kernel); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||||
| std::vector<kernel::LiteKernel *> *all_kernels, | |||||
| std::vector<Tensor *> *all_tensors) { | |||||
| auto kernel = *it; | |||||
| // Single output multiple references | |||||
| for (int i = 0; i < kernel->out_kernels().size(); i++) { | |||||
| auto next_kernel = kernel->out_kernels().at(i); | |||||
| if (next_kernel->desc().arch == kNPU && npu_trans_nodes.find(next_kernel->Type()) != npu_trans_nodes.end()) { | |||||
| continue; | |||||
| } | |||||
| // Change format the output of the current kernel nhwc->nchw | |||||
| auto shapes = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[1], | |||||
| kernel->out_tensors()[0]->shape()[2], kernel->out_tensors()[0]->shape()[3]}; | |||||
| auto tensor = new Tensor(kernel->out_tensors()[0]->data_type(), shapes, schema::Format_NHWC, Tensor::VAR); | |||||
| std::vector<Tensor *> post_trans_out_tensors = {tensor}; | |||||
| all_tensors->push_back(post_trans_out_tensors[0]); | |||||
| // Use the output tensor of the current node as the input tensor of the post-conversion operator | |||||
| auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++); | |||||
| auto *post_trans_kernel = | |||||
| NPUPassUtils::CreateNchw2NhwcKernel(kernel->out_tensors(), post_trans_out_tensors, context, name); | |||||
| // Replace the input tensor of the next node | |||||
| NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {next_kernel}, kernel->out_tensors(), | |||||
| post_trans_out_tensors); | |||||
| // Directly insert in the back, will not affect the topological sort | |||||
| all_kernels->push_back(post_trans_kernel); | |||||
| UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, next_kernel); | |||||
| UpdateNC2NHTransNodeAfterKernel(kernel, post_trans_kernel, next_kernel); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUTransformPass::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *next_kernel) { | |||||
| std::vector<kernel::LiteKernel *> cur_out_kernels; | |||||
| for (auto out_kernel : kernel->out_kernels()) { | |||||
| if (out_kernel == next_kernel) { | |||||
| cur_out_kernels.push_back(trans_kernel); | |||||
| } else { | |||||
| cur_out_kernels.push_back(out_kernel); | |||||
| } | |||||
| } | |||||
| auto kernel_out_tensor = kernel->out_tensors()[0]; | |||||
| // Change format the output of the current kernel nhwc->nchw | |||||
| std::vector<int> kernel_out_new_shapes = {kernel_out_tensor->shape()[0], kernel_out_tensor->shape()[3], | |||||
| kernel_out_tensor->shape()[1], kernel_out_tensor->shape()[2]}; | |||||
| kernel_out_tensor->set_format(schema::Format_NCHW); | |||||
| kernel_out_tensor->set_shape(kernel_out_new_shapes); | |||||
| NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), cur_out_kernels, kernel->in_tensors(), {kernel_out_tensor}); | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *next_kernel) { | |||||
| std::vector<Tensor *> next_in_tensors; | |||||
| for (auto next_in_tensor : next_kernel->in_tensors()) { | |||||
| if (next_in_tensor != kernel->out_tensors()[0]) { | |||||
| next_in_tensors.push_back(next_in_tensor); | |||||
| } else { | |||||
| next_in_tensors.push_back(trans_kernel->out_tensors()[0]); | |||||
| } | |||||
| } | |||||
| next_kernel->set_in_tensors(next_in_tensors); | |||||
| std::vector<kernel::LiteKernel *> next_in_kernels; | |||||
| for (auto in_kernel : next_kernel->in_kernels()) { | |||||
| if (in_kernel == kernel) { | |||||
| next_in_kernels.push_back(trans_kernel); | |||||
| } else { | |||||
| next_in_kernels.push_back(in_kernel); | |||||
| } | |||||
| } | |||||
| NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors, | |||||
| next_kernel->out_tensors()); | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUTransformPass::FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||||
| std::vector<Tensor *> *all_tensors) { | |||||
| if (context->IsNpuEnabled()) { | |||||
| std::vector<kernel::LiteKernel *> new_kernels; | |||||
| for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) { | |||||
| auto kernel = *it; | |||||
| if (kernel->desc().arch != kNPU) { | |||||
| new_kernels.push_back(kernel); | |||||
| continue; | |||||
| } | |||||
| if (npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) { | |||||
| InsertPreNode(context, it, &new_kernels, all_tensors); | |||||
| new_kernels.push_back(kernel); | |||||
| InsertPostNode(context, it, &new_kernels, all_tensors); | |||||
| } else { | |||||
| new_kernels.push_back(kernel); | |||||
| } | |||||
| } | |||||
| all_kernels->clear(); | |||||
| for (int i = 0; i < new_kernels.size(); i++) { | |||||
| all_kernels->push_back(new_kernels[i]); | |||||
| } | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| } // namespace mindspore::lite | |||||
| @@ -0,0 +1,51 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_ | |||||
| #include <vector> | |||||
| #include "src/lite_kernel.h" | |||||
| #include "src/ops/primitive_c.h" | |||||
| namespace mindspore::lite { | |||||
| class NPUTransformPass { | |||||
| public: | |||||
| int FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||||
| std::vector<Tensor *> *all_tensors); | |||||
| private: | |||||
| int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *after_kernel); | |||||
| int UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *before_kernel); | |||||
| int UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *after_kernel); | |||||
| int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *next_kernel); | |||||
| int InsertPreNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||||
| std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors); | |||||
| int InsertPostNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||||
| std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors); | |||||
| private: | |||||
| int total = 0; | |||||
| }; | |||||
| } // namespace mindspore::lite | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_ | |||||
| @@ -24,7 +24,6 @@ | |||||
| #include "include/graph/model.h" | #include "include/graph/model.h" | ||||
| #include "include/hiai_ir_build.h" | #include "include/hiai_ir_build.h" | ||||
| #include "include/HiAiModelManagerType.h" | #include "include/HiAiModelManagerType.h" | ||||
| #include "include/context.h" | |||||
| #include "include/version.h" | #include "include/version.h" | ||||
| #include "src/common/utils.h" | #include "src/common/utils.h" | ||||
| #include "src/runtime/agent/npu/npu_converter_utils.h" | #include "src/runtime/agent/npu/npu_converter_utils.h" | ||||
| @@ -34,10 +33,6 @@ namespace mindspore::kernel { | |||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| std::set<schema::PrimitiveType> trans_nodes = {schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, | |||||
| schema::PrimitiveType_DepthwiseConv2D, | |||||
| schema::PrimitiveType_DeDepthwiseConv2D, schema::PrimitiveType_Resize}; | |||||
| domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { | domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { | ||||
| ge::Graph graph("NPUGraph"); | ge::Graph graph("NPUGraph"); | ||||
| @@ -75,8 +70,7 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { | |||||
| } | } | ||||
| int SubGraphNpuKernel::Run() { | int SubGraphNpuKernel::Run() { | ||||
| return reinterpret_cast<lite::NPUExecutor *>(this->executor_) | |||||
| ->Run(in_tensors_, out_tensors_, nodes_, inputs_nhwc2nchw_, outputs_nchw2nhwc_); | |||||
| return reinterpret_cast<lite::NPUExecutor *>(this->executor_)->Run(in_tensors_, out_tensors_, nodes_); | |||||
| } | } | ||||
| int SubGraphNpuKernel::BuildNPUInputOp() { | int SubGraphNpuKernel::BuildNPUInputOp() { | ||||
| @@ -88,21 +82,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { | |||||
| if (IsSubGraphInputTensor(in_tensor)) { | if (IsSubGraphInputTensor(in_tensor)) { | ||||
| auto tensor_name = node->name() + "_" + std::to_string(count++); | auto tensor_name = node->name() + "_" + std::to_string(count++); | ||||
| hiai::op::Data *data; | hiai::op::Data *data; | ||||
| if (trans_nodes.find(node->Type()) != trans_nodes.end()) { | |||||
| auto shape = in_tensor->shape(); | |||||
| data = new (std::nothrow) hiai::op::Data(tensor_name); | |||||
| if (data == nullptr) { | |||||
| MS_LOG(ERROR) << "New data failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| ge::TensorDesc tensor_desc(lite::ConverterToNPUShape({shape[0], shape[3], shape[1], shape[2]}), | |||||
| ge::FORMAT_NCHW, lite::ConverterToNPUDataType(in_tensor->data_type())); | |||||
| data->update_input_desc_x(tensor_desc); | |||||
| inputs_nhwc2nchw_.push_back(true); | |||||
| } else { | |||||
| data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name); | |||||
| inputs_nhwc2nchw_.push_back(false); | |||||
| } | |||||
| data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name); | |||||
| subgraph_input_op_.push_back(*data); | subgraph_input_op_.push_back(*data); | ||||
| node_input_op.push_back(data); | node_input_op.push_back(data); | ||||
| continue; | continue; | ||||
| @@ -132,7 +112,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { | |||||
| // weight tensor | // weight tensor | ||||
| if (is_weight_tensor) { | if (is_weight_tensor) { | ||||
| if (trans_nodes.find(node->Type()) == trans_nodes.end()) { | |||||
| if (lite::npu_trans_nodes.find(node->Type()) == lite::npu_trans_nodes.end()) { | |||||
| auto name = node->name() + "_" + std::to_string(count++); | auto name = node->name() + "_" + std::to_string(count++); | ||||
| auto weight_const = new (std::nothrow) hiai::op::Const(node->name() + "_" + std::to_string(count++)); | auto weight_const = new (std::nothrow) hiai::op::Const(node->name() + "_" + std::to_string(count++)); | ||||
| if (weight_const == nullptr) { | if (weight_const == nullptr) { | ||||
| @@ -162,11 +142,6 @@ std::vector<ge::Operator> SubGraphNpuKernel::GetNPUNodes(const vector<kernel::Li | |||||
| ops.reserve(nodes.size()); | ops.reserve(nodes.size()); | ||||
| for (int i = 0; i < nodes.size(); i++) { | for (int i = 0; i < nodes.size(); i++) { | ||||
| ops.push_back(*reinterpret_cast<NPUKernel *>(nodes[i])->GetNPUOp()); | ops.push_back(*reinterpret_cast<NPUKernel *>(nodes[i])->GetNPUOp()); | ||||
| if (trans_nodes.find(schema::PrimitiveType(nodes[i]->GetPrimitive()->Type())) != trans_nodes.end()) { | |||||
| outputs_nchw2nhwc_.push_back(true); | |||||
| } else { | |||||
| outputs_nchw2nhwc_.push_back(false); | |||||
| } | |||||
| } | } | ||||
| return ops; | return ops; | ||||
| } | } | ||||
| @@ -69,10 +69,6 @@ class SubGraphNpuKernel : public SubGraphKernel { | |||||
| std::string GetOMModelName(); | std::string GetOMModelName(); | ||||
| private: | private: | ||||
| std::vector<bool> inputs_nhwc2nchw_; | |||||
| std::vector<bool> outputs_nchw2nhwc_; | |||||
| domi::ModelBufferData *model_buffer_data_; | domi::ModelBufferData *model_buffer_data_; | ||||
| std::vector<ge::Operator> subgraph_input_op_; | std::vector<ge::Operator> subgraph_input_op_; | ||||
| @@ -16,6 +16,7 @@ | |||||
| #include "src/runtime/kernel/npu/convolution_base_npu.h" | #include "src/runtime/kernel/npu/convolution_base_npu.h" | ||||
| #include "src/runtime/agent/npu/npu_converter_utils.h" | #include "src/runtime/agent/npu/npu_converter_utils.h" | ||||
| #include "nnacl/pack.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| ConvolutionBaseNPUKernel::~ConvolutionBaseNPUKernel() { | ConvolutionBaseNPUKernel::~ConvolutionBaseNPUKernel() { | ||||
| @@ -39,14 +40,27 @@ int ConvolutionBaseNPUKernel::InitWeightBiasConst(const std::vector<lite::Tensor | |||||
| MS_LOG(ERROR) << "New weight const failed."; | MS_LOG(ERROR) << "New weight const failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto weight_shape = inputs[1]->shape(); | |||||
| inputs[1]->set_shape({weight_shape[0], weight_shape[3], weight_shape[1], weight_shape[2]}); | |||||
| inputs[1]->set_format(schema::Format_NCHW); | |||||
| auto weight_tensor = mindspore::lite::ConverterToNPUTensor(inputs[1]); | |||||
| weight_->set_attr_value(weight_tensor); | |||||
| auto w_shape = inputs[1]->shape(); | |||||
| auto nhwc_data = inputs[1]->data_c(); | |||||
| auto nchw_data = reinterpret_cast<float *>(malloc(inputs[1]->ElementsNum() * sizeof(float))); | |||||
| if (nchw_data == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| PackNHWCToNCHWFp32(nhwc_data, nchw_data, w_shape[0], w_shape[1] * w_shape[2], w_shape[3]); | |||||
| inputs[1]->set_shape(weight_shape); | |||||
| inputs[1]->set_format(schema::Format_NHWC); | |||||
| std::shared_ptr<ge::Tensor> weight_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor()); | |||||
| if (weight_tensor == nullptr) { | |||||
| MS_LOG(ERROR) << "new weight_tensor failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| ge::TensorDesc tensor_desc(lite::ConverterToNPUShape({w_shape[0], w_shape[3], w_shape[1], w_shape[2]}), | |||||
| ge::FORMAT_NCHW, lite::ConverterToNPUDataType(inputs[1]->data_type())); | |||||
| weight_tensor->SetTensorDesc(tensor_desc); | |||||
| weight_tensor->SetData(reinterpret_cast<const uint8_t *>(nchw_data), inputs[1]->Size()); | |||||
| weight_->set_attr_value(weight_tensor); | |||||
| free(nchw_data); | |||||
| if (inputs.size() >= 3) { | if (inputs.size() >= 3) { | ||||
| bias_ = new (std::nothrow) hiai::op::Const(name_ + "_b"); | bias_ = new (std::nothrow) hiai::op::Const(name_ + "_b"); | ||||
| @@ -17,17 +17,18 @@ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CONVOLUTION_BASE_NPU_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CONVOLUTION_BASE_NPU_H_ | ||||
| #include <vector> | #include <vector> | ||||
| #include <memory> | |||||
| #include "include/graph/op/all_ops.h" | #include "include/graph/op/all_ops.h" | ||||
| #include "src/runtime/kernel/npu/transpose_base_npu.h" | #include "src/runtime/kernel/npu/transpose_base_npu.h" | ||||
| #include "nnacl/conv_parameter.h" | #include "nnacl/conv_parameter.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class ConvolutionBaseNPUKernel : public TransposeBaseNPUKernel { | |||||
| class ConvolutionBaseNPUKernel : public NPUKernel { | |||||
| public: | public: | ||||
| ConvolutionBaseNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ConvolutionBaseNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : TransposeBaseNPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~ConvolutionBaseNPUKernel() override; | ~ConvolutionBaseNPUKernel() override; | ||||
| protected: | protected: | ||||
| @@ -25,7 +25,7 @@ using mindspore::schema::PrimitiveType_DepthwiseConv2D; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int ConvolutionDepthwiseNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, | int ConvolutionDepthwiseNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) { | const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) { | ||||
| return RET_ERROR; | |||||
| return RET_OK; | |||||
| } | } | ||||
| int ConvolutionDepthwiseNPUKernel::SetConvDwParam() { | int ConvolutionDepthwiseNPUKernel::SetConvDwParam() { | ||||
| @@ -49,19 +49,13 @@ int ConvolutionDepthwiseNPUKernel::SetConvDwParam() { | |||||
| int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, | const std::vector<lite::Tensor *> &outputs, | ||||
| const std::vector<ge::Operator *> &npu_inputs) { | const std::vector<ge::Operator *> &npu_inputs) { | ||||
| auto ret = SetPreTranspose(npu_inputs[0]); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // set conv attr param | // set conv attr param | ||||
| conv_dw_ = new (std::nothrow) hiai::op::ConvolutionDepthwise(name_ + "_conv_depthwise"); | conv_dw_ = new (std::nothrow) hiai::op::ConvolutionDepthwise(name_ + "_conv_depthwise"); | ||||
| if (conv_dw_ == nullptr) { | if (conv_dw_ == nullptr) { | ||||
| MS_LOG(ERROR) << "New convolution depthwise operator for op " << name_ << " failed."; | MS_LOG(ERROR) << "New convolution depthwise operator for op " << name_ << " failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ret = SetConvDwParam(); | |||||
| auto ret = SetConvDwParam(); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Set npu op parameter for convolution depthwise op " << name_ << " failed."; | MS_LOG(ERROR) << "Set npu op parameter for convolution depthwise op " << name_ << " failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -76,7 +70,7 @@ int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor * | |||||
| if (inputs.size() == 3) { | if (inputs.size() == 3) { | ||||
| conv_dw_->set_input_bias(*bias_); | conv_dw_->set_input_bias(*bias_); | ||||
| } | } | ||||
| conv_dw_->set_input_x(*pre_trans_); | |||||
| conv_dw_->set_input_x(*npu_inputs[0]); | |||||
| if (conv_param_->act_type_ != ActType_No) { | if (conv_param_->act_type_ != ActType_No) { | ||||
| ret = SetActivation(conv_dw_, conv_param_->act_type_); | ret = SetActivation(conv_dw_, conv_param_->act_type_); | ||||
| @@ -85,21 +79,17 @@ int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor * | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| } | } | ||||
| return RET_OK; | |||||
| } | |||||
| ge::Operator *mindspore::kernel::ConvolutionDepthwiseNPUKernel::GetNPUOp() { | |||||
| if (conv_param_->act_type_ == ActType_No) { | if (conv_param_->act_type_ == ActType_No) { | ||||
| ret = SetPostTranspose(conv_dw_); | |||||
| return conv_dw_; | |||||
| } else { | } else { | ||||
| ret = SetPostTranspose(act_); | |||||
| } | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed."; | |||||
| return RET_ERROR; | |||||
| return act_; | |||||
| } | } | ||||
| return RET_OK; | |||||
| } | } | ||||
| ge::Operator *mindspore::kernel::ConvolutionDepthwiseNPUKernel::GetNPUOp() { return post_trans_; } | |||||
| ConvolutionDepthwiseNPUKernel::~ConvolutionDepthwiseNPUKernel() { | ConvolutionDepthwiseNPUKernel::~ConvolutionDepthwiseNPUKernel() { | ||||
| if (conv_dw_ != nullptr) { | if (conv_dw_ != nullptr) { | ||||
| delete conv_dw_; | delete conv_dw_; | ||||
| @@ -24,7 +24,7 @@ using mindspore::schema::PrimitiveType_Conv2D; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int ConvolutionNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, | int ConvolutionNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) { | const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) { | ||||
| return RET_ERROR; | |||||
| return RET_OK; | |||||
| } | } | ||||
| int ConvolutionNPUKernel::SetConvParam() { | int ConvolutionNPUKernel::SetConvParam() { | ||||
| @@ -49,19 +49,13 @@ int ConvolutionNPUKernel::SetConvParam() { | |||||
| int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, | const std::vector<lite::Tensor *> &outputs, | ||||
| const std::vector<ge::Operator *> &npu_inputs) { | const std::vector<ge::Operator *> &npu_inputs) { | ||||
| auto ret = SetPreTranspose(npu_inputs[0]); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // set conv attr param | // set conv attr param | ||||
| conv_ = new (std::nothrow) hiai::op::Convolution(name_ + "_conv"); | conv_ = new (std::nothrow) hiai::op::Convolution(name_ + "_conv"); | ||||
| if (conv_ == nullptr) { | if (conv_ == nullptr) { | ||||
| MS_LOG(ERROR) << "New convolution operator for convolution op " << name_ << " failed."; | MS_LOG(ERROR) << "New convolution operator for convolution op " << name_ << " failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ret = SetConvParam(); | |||||
| auto ret = SetConvParam(); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Set npu op parameter for convolution op " << name_ << " failed."; | MS_LOG(ERROR) << "Set npu op parameter for convolution op " << name_ << " failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -76,7 +70,7 @@ int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs | |||||
| if (inputs.size() == 3) { | if (inputs.size() == 3) { | ||||
| conv_->set_input_bias(*bias_); | conv_->set_input_bias(*bias_); | ||||
| } | } | ||||
| conv_->set_input_x(*pre_trans_); | |||||
| conv_->set_input_x(*npu_inputs[0]); | |||||
| if (conv_param_->act_type_ != ActType_No) { | if (conv_param_->act_type_ != ActType_No) { | ||||
| ret = SetActivation(conv_, conv_param_->act_type_); | ret = SetActivation(conv_, conv_param_->act_type_); | ||||
| @@ -85,21 +79,17 @@ int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| } | } | ||||
| return RET_OK; | |||||
| } | |||||
| ge::Operator *mindspore::kernel::ConvolutionNPUKernel::GetNPUOp() { | |||||
| if (conv_param_->act_type_ == ActType_No) { | if (conv_param_->act_type_ == ActType_No) { | ||||
| ret = SetPostTranspose(conv_); | |||||
| return conv_; | |||||
| } else { | } else { | ||||
| ret = SetPostTranspose(act_); | |||||
| } | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed."; | |||||
| return RET_ERROR; | |||||
| return act_; | |||||
| } | } | ||||
| return RET_OK; | |||||
| } | } | ||||
| ge::Operator *mindspore::kernel::ConvolutionNPUKernel::GetNPUOp() { return post_trans_; } | |||||
| ConvolutionNPUKernel::~ConvolutionNPUKernel() { | ConvolutionNPUKernel::~ConvolutionNPUKernel() { | ||||
| if (conv_ != nullptr) { | if (conv_ != nullptr) { | ||||
| delete conv_; | delete conv_; | ||||
| @@ -62,23 +62,17 @@ int PoolingNPUKernel::SetPoolingParam() { | |||||
| int PoolingNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | int PoolingNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, | const std::vector<lite::Tensor *> &outputs, | ||||
| const std::vector<ge::Operator *> &npu_inputs) { | const std::vector<ge::Operator *> &npu_inputs) { | ||||
| auto ret = SetPreTranspose(npu_inputs[0]); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| pooling_ = new (std::nothrow) hiai::op::PoolingD(name_ + "_pooling"); | pooling_ = new (std::nothrow) hiai::op::PoolingD(name_ + "_pooling"); | ||||
| if (pooling_ == nullptr) { | if (pooling_ == nullptr) { | ||||
| MS_LOG(ERROR) << "New pooling npu operator for op " << name_ << " failed."; | MS_LOG(ERROR) << "New pooling npu operator for op " << name_ << " failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ret = SetPoolingParam(); | |||||
| auto ret = SetPoolingParam(); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Set npu op parameter for convolution op " << name_ << " failed."; | MS_LOG(ERROR) << "Set npu op parameter for convolution op " << name_ << " failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| pooling_->set_input_x(*pre_trans_); | |||||
| pooling_->set_input_x(*npu_inputs[0]); | |||||
| if (pooling_param_->act_type_ != ActType_No) { | if (pooling_param_->act_type_ != ActType_No) { | ||||
| ret = SetActivation(pooling_, pooling_param_->act_type_); | ret = SetActivation(pooling_, pooling_param_->act_type_); | ||||
| @@ -87,21 +81,17 @@ int PoolingNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| } | } | ||||
| return RET_OK; | |||||
| } | |||||
| ge::Operator *mindspore::kernel::PoolingNPUKernel::GetNPUOp() { | |||||
| if (pooling_param_->act_type_ == ActType_No) { | if (pooling_param_->act_type_ == ActType_No) { | ||||
| ret = SetPostTranspose(pooling_); | |||||
| return pooling_; | |||||
| } else { | } else { | ||||
| ret = SetPostTranspose(act_); | |||||
| return act_; | |||||
| } | } | ||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | } | ||||
| ge::Operator *mindspore::kernel::PoolingNPUKernel::GetNPUOp() { return post_trans_; } | |||||
| PoolingNPUKernel::~PoolingNPUKernel() { | PoolingNPUKernel::~PoolingNPUKernel() { | ||||
| if (pooling_ != nullptr) { | if (pooling_ != nullptr) { | ||||
| delete pooling_; | delete pooling_; | ||||
| @@ -36,12 +36,6 @@ int ResizeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const | |||||
| int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | ||||
| const std::vector<ge::Operator *> &npu_inputs) { | const std::vector<ge::Operator *> &npu_inputs) { | ||||
| auto ret = SetPreTranspose(npu_inputs[0]); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| ge::TensorDesc sizeTensorDesc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_INT32); | ge::TensorDesc sizeTensorDesc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_INT32); | ||||
| ge::TensorPtr sizeTensor = std::make_shared<hiai::Tensor>(sizeTensorDesc); | ge::TensorPtr sizeTensor = std::make_shared<hiai::Tensor>(sizeTensorDesc); | ||||
| vector<int32_t> dataValue = {static_cast<int32_t>(new_height_), static_cast<int32_t>(new_width_)}; | vector<int32_t> dataValue = {static_cast<int32_t>(new_height_), static_cast<int32_t>(new_width_)}; | ||||
| @@ -55,7 +49,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| op->set_attr_align_corners(align_corners_); | op->set_attr_align_corners(align_corners_); | ||||
| op->set_input_x(*pre_trans_); | |||||
| op->set_input_x(*npu_inputs[0]); | |||||
| op->set_input_size(*out_size); | op->set_input_size(*out_size); | ||||
| op->set_attr_half_pixel_centers(preserve_aspect_ratio_); | op->set_attr_half_pixel_centers(preserve_aspect_ratio_); | ||||
| op_ = op; | op_ = op; | ||||
| @@ -66,21 +60,14 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| op->set_attr_align_corners(align_corners_); | op->set_attr_align_corners(align_corners_); | ||||
| op->set_input_x(*pre_trans_); | |||||
| op->set_input_x(*npu_inputs[0]); | |||||
| op->set_input_size(*out_size); | op->set_input_size(*out_size); | ||||
| op_ = op; | op_ = op; | ||||
| } | } | ||||
| ret = SetPostTranspose(op_); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| ge::Operator *mindspore::kernel::ResizeNPUKernel::GetNPUOp() { return this->post_trans_; } | |||||
| ge::Operator *mindspore::kernel::ResizeNPUKernel::GetNPUOp() { return this->op_; } | |||||
| ResizeNPUKernel::~ResizeNPUKernel() { | ResizeNPUKernel::~ResizeNPUKernel() { | ||||
| if (op_ != nullptr) { | if (op_ != nullptr) { | ||||
| @@ -24,12 +24,12 @@ | |||||
| #include "include/graph/op/all_ops.h" | #include "include/graph/op/all_ops.h" | ||||
| #include "src/runtime/kernel/npu/transpose_base_npu.h" | #include "src/runtime/kernel/npu/transpose_base_npu.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class ResizeNPUKernel : public TransposeBaseNPUKernel { | |||||
| class ResizeNPUKernel : public NPUKernel { | |||||
| public: | public: | ||||
| ResizeNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ResizeNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : TransposeBaseNPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||||
| auto resize_parameter = reinterpret_cast<ResizeParameter *>(parameter); | auto resize_parameter = reinterpret_cast<ResizeParameter *>(parameter); | ||||
| method_ = resize_parameter->method_; | method_ = resize_parameter->method_; | ||||
| new_height_ = resize_parameter->new_height_; | new_height_ = resize_parameter->new_height_; | ||||
| @@ -33,6 +33,8 @@ | |||||
| #if SUPPORT_NPU | #if SUPPORT_NPU | ||||
| #include "src/runtime/agent/npu/subgraph_npu_kernel.h" | #include "src/runtime/agent/npu/subgraph_npu_kernel.h" | ||||
| #include "src/runtime/agent/npu/npu_manager.h" | #include "src/runtime/agent/npu/npu_manager.h" | ||||
| #include "src/runtime/agent/npu/npu_transform_pass.h" | |||||
| #include "src/runtime/agent/npu/npu_fusion_pass.h" | |||||
| #endif | #endif | ||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| using kernel::KERNEL_ARCH::kCPU; | using kernel::KERNEL_ARCH::kCPU; | ||||
| @@ -63,6 +65,11 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| FindAllInoutKernels(*dst_kernels); | FindAllInoutKernels(*dst_kernels); | ||||
| ret = RunPass(dst_kernels); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Schedule run pass failed."; | |||||
| return ret; | |||||
| } | |||||
| ret = ConstructSubGraphs(dst_kernels); | ret = ConstructSubGraphs(dst_kernels); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConstructSubGraphs failed."; | MS_LOG(ERROR) << "ConstructSubGraphs failed."; | ||||
| @@ -514,4 +521,25 @@ void Scheduler::FindAllInoutKernels(const std::vector<kernel::LiteKernel *> &ker | |||||
| kernel->FindInoutKernels(kernels); | kernel->FindInoutKernels(kernels); | ||||
| } | } | ||||
| } | } | ||||
| int Scheduler::RunPass(std::vector<kernel::LiteKernel *> *dst_kernels) { | |||||
| int ret = RET_OK; | |||||
| #if SUPPORT_NPU | |||||
| auto transform_pass = new NPUTransformPass; | |||||
| ret = transform_pass->FormatTransformPass(context_, dst_kernels, &src_tensors_); | |||||
| delete transform_pass; | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Run npu format transform pass failed."; | |||||
| return ret; | |||||
| } | |||||
| auto fusion_pass = new NPUFusionPass(dst_kernels); | |||||
| ret = fusion_pass->Fusion(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Run npu fussion transform pass failed."; | |||||
| return ret; | |||||
| } | |||||
| delete fusion_pass; | |||||
| #endif | |||||
| return ret; | |||||
| } | |||||
| } // namespace mindspore::lite | } // namespace mindspore::lite | ||||
| @@ -77,6 +77,8 @@ class Scheduler { | |||||
| static kernel::SubGraphType GetKernelSubGraphType(const kernel::LiteKernel *kernel); | static kernel::SubGraphType GetKernelSubGraphType(const kernel::LiteKernel *kernel); | ||||
| int RunPass(std::vector<kernel::LiteKernel *> *dst_kernels); | |||||
| protected: | protected: | ||||
| const InnerContext *context_ = nullptr; | const InnerContext *context_ = nullptr; | ||||
| Model *src_model_ = nullptr; | Model *src_model_ = nullptr; | ||||