diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h index d1466c4d27..680453bce5 100644 --- a/mindspore/lite/src/lite_kernel.h +++ b/mindspore/lite/src/lite_kernel.h @@ -95,6 +95,8 @@ class LiteKernel { virtual int Init() { return mindspore::lite::RET_ERROR; } + OpParameter *op_parameter() { return op_parameter_; } + std::string name() const { return this->name_; } virtual int Train() { diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc index 58e019336c..e1019b5de0 100644 --- a/mindspore/lite/src/lite_session.cc +++ b/mindspore/lite/src/lite_session.cc @@ -479,12 +479,6 @@ int LiteSession::Init(const Context *context) { is_running_.store(false); return ret; } - ret = InitNPURuntime(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Init NPU runtime failed."; - is_running_.store(false); - return ret; - } executor_ = new (std::nothrow) Executor(); if (nullptr == executor_) { MS_LOG(ERROR) << "New Executor failed"; @@ -661,18 +655,6 @@ int LiteSession::Resize(const std::vector &inputs return RET_OK; } -int LiteSession::InitNPURuntime() { -#if SUPPORT_NPU - if (this->context_->IsNpuEnabled()) { - if (mindspore::lite::NPUManager::GetInstance()->InitClient() != RET_OK) { - MS_LOG(ERROR) << "NPU client init error."; - return RET_ERROR; - } - } -#endif - return RET_OK; -} - int LiteSession::InitGPURuntime() { #if SUPPORT_GPU if (this->context_->IsGpuEnabled()) { diff --git a/mindspore/lite/src/lite_session.h b/mindspore/lite/src/lite_session.h index 002c3588eb..a24e5ff297 100644 --- a/mindspore/lite/src/lite_session.h +++ b/mindspore/lite/src/lite_session.h @@ -103,8 +103,6 @@ class LiteSession : public session::LiteSession { private: void ResetInputsShape(const std::vector> &dims); - int InitNPURuntime(); - int InitGPURuntime(); protected: diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc index 6b50fe5743..1c5d70e711 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc +++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc @@ -17,10 +17,9 @@ #include "src/runtime/agent/npu/npu_executor.h" #include "include/errorcode.h" #include "src/runtime/agent/npu/npu_manager.h" -#include "nnacl/pack.h" namespace mindspore::lite { int NPUExecutor::Prepare(const std::vector &kernels) { - this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(); + this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(model_name_); if (this->client_ == nullptr) { MS_LOG(ERROR) << "client is nullptr."; return RET_ERROR; @@ -33,9 +32,8 @@ int NPUExecutor::Prepare(const std::vector &kernels) { } int NPUExecutor::Run(const std::vector &in_tensors, const std::vector &out_tensors, - const std::vector &kernels, const std::vector &inputs_nhwc2nchw, - const std::vector &outputs_nchw2nhwc, Allocator *allocator, const KernelCallBack &before, - const KernelCallBack &after) { + const std::vector &kernels, Allocator *allocator, + const KernelCallBack &before, const KernelCallBack &after) { hiai::AiContext context; for (int i = 0; i < npu_input_tensors_.size(); ++i) { void *data = in_tensors[i]->data_c(); @@ -43,12 +41,7 @@ int NPUExecutor::Run(const std::vector &in_tensors, const std::vector< MS_LOG(ERROR) << model_name_ << " inputs data is nullptr"; return RET_ERROR; } - if (inputs_nhwc2nchw[i]) { - PackNHWCToNCHWFp32(data, npu_input_tensors_[i]->GetBuffer(), in_tensors[i]->Batch(), - in_tensors[i]->Width() * in_tensors[i]->Height(), in_tensors[i]->Channel()); - } else { - memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[i]->Size()); - } + memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[i]->Size()); } context.AddPara("model_name", model_name_); if (this->client_ == nullptr) { @@ -68,12 +61,7 @@ int NPUExecutor::Run(const std::vector &in_tensors, const std::vector< MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } - if (outputs_nchw2nhwc[i]) { - PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data, out_tensors[i]->Batch(), - out_tensors[i]->Width() * out_tensors[i]->Height(), out_tensors[i]->Channel()); - } else { - memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); - } + memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); out_tensors[i]->ResetRefCount(); } return RET_OK; diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.h b/mindspore/lite/src/runtime/agent/npu/npu_executor.h index d3645f3143..899239e313 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_executor.h +++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.h @@ -32,8 +32,7 @@ class NPUExecutor : public Executor { int Prepare(const std::vector &kernels) override; int Run(const std::vector &in_tensors, const std::vector &out_tensors, - const std::vector &kernels, const std::vector &inputs_nhwc2nchw, - const std::vector &outputs_nchw2nhwc, Allocator *allocator = nullptr, + const std::vector &kernels, Allocator *allocator = nullptr, const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr); private: diff --git a/mindspore/lite/src/runtime/agent/npu/npu_fusion_pass.cc b/mindspore/lite/src/runtime/agent/npu/npu_fusion_pass.cc new file mode 100644 index 0000000000..0028fbceae --- /dev/null +++ b/mindspore/lite/src/runtime/agent/npu/npu_fusion_pass.cc @@ -0,0 +1,224 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/runtime/agent/npu/npu_fusion_pass.h" +#include +#include "src/lite_kernel.h" +#include "nnacl/concat_parameter.h" + +namespace mindspore::lite { +bool CheckFusion(kernel::LiteKernel *kernel) { + auto pre_flag = + std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *kernel) { + return kernel->Type() == schema::PrimitiveType_Nchw2Nhwc && kernel->out_kernels().size() == 1; + }); + if (!pre_flag) { + return false; + } + auto post_flag = + std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) { + return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw && kernel->in_kernels().size() == 1; + }); + return post_flag; +} + +void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) { + for (auto in_kernel : cur_kernel->in_kernels()) { + auto pre_kernel = in_kernel->in_kernels()[0]; + + auto pre_out_kernels = pre_kernel->out_kernels(); + for (size_t i = 0; i < pre_out_kernels.size(); i++) { + if (pre_out_kernels[i] == in_kernel) { + pre_out_kernels[i] = cur_kernel; + break; + } + } + pre_kernel->set_out_kernels(pre_out_kernels); + + auto cur_in_kernels = cur_kernel->in_kernels(); + for (size_t i = 0; i < cur_in_kernels.size(); i++) { + if (cur_in_kernels[i] == in_kernel) { + cur_in_kernels[i] = pre_kernel; + break; + } + } + cur_kernel->set_in_kernels(cur_in_kernels); + kernels->erase(find(kernels->begin(), kernels->end(), in_kernel)); + } +} + +void NPUFusionPass::UpdatePostKernels(kernel::LiteKernel *cur_kernel) { + for (auto out_kernel : cur_kernel->out_kernels()) { + auto post_kernel = out_kernel->out_kernels()[0]; + + auto post_in_kernels = post_kernel->in_kernels(); + for (size_t i = 0; i < post_in_kernels.size(); i++) { + if (post_in_kernels[i] == out_kernel) { + post_in_kernels[i] = cur_kernel; + break; + } + } + post_kernel->set_in_kernels(post_in_kernels); + + auto cur_out_kernels = cur_kernel->out_kernels(); + for (size_t i = 0; i < cur_out_kernels.size(); i++) { + if (cur_out_kernels[i] == out_kernel) { + cur_out_kernels[i] = post_kernel; + break; + } + } + cur_kernel->set_out_kernels(cur_out_kernels); + kernels->erase(find(kernels->begin(), kernels->end(), out_kernel)); + } +} + +void UpdatePreTensors(kernel::LiteKernel *cur_kernel) { + auto tensors_vec = cur_kernel->in_tensors(); + for (auto in_kernel : cur_kernel->in_kernels()) { + lite::Tensor *cur_tensor = nullptr; + auto in_tensor = in_kernel->in_tensors()[0]; + auto out_tensor = in_kernel->out_tensors()[0]; + auto pre_kernel = in_kernel->in_kernels()[0]; + for (size_t i = 0; i < pre_kernel->out_tensors().size(); i++) { + if (pre_kernel->out_tensors()[i] == in_tensor) { + cur_tensor = pre_kernel->out_tensors()[i]; + } + } + for (size_t i = 0; i < tensors_vec.size(); i++) { + if (tensors_vec[i] == out_tensor) { + tensors_vec[i] = cur_tensor; + } + } + } + cur_kernel->set_in_tensors(tensors_vec); +} + +void UpdatePostTensors(kernel::LiteKernel *cur_kernel) { + auto tensors_vec = cur_kernel->out_tensors(); + for (auto out_kernel : cur_kernel->out_kernels()) { + auto in_tensor = out_kernel->in_tensors()[0]; + auto out_tensor = out_kernel->out_tensors()[0]; + auto post_kernel = out_kernel->out_kernels()[0]; + lite::Tensor *cur_tensor = nullptr; + for (size_t i = 0; i < post_kernel->in_tensors().size(); i++) { + if (post_kernel->in_tensors()[i] == out_tensor) { + cur_tensor = post_kernel->in_tensors()[i]; + } + } + for (size_t i = 0; i < tensors_vec.size(); i++) { + if (tensors_vec[i] == in_tensor) { + tensors_vec[i] = cur_tensor; + } + } + } + cur_kernel->set_out_tensors(tensors_vec); +} + +int TransFormAxis(int axis) { + switch (axis) { + case 0: + return 0; + case 1: + return 2; + case 2: + return 3; + case 3: + case -1: + return 1; + default: + return -2; + } +} + +int NPUFusionPass::AddFusion(kernel::LiteKernel *kernel) { + if (!CheckFusion(kernel)) { + return RET_OK; + } + UpdatePreTensors(kernel); + UpdatePostTensors(kernel); + UpdatePreKernels(kernel); + UpdatePostKernels(kernel); + return RET_OK; +} + +int NPUFusionPass::ConcatFusion(kernel::LiteKernel *kernel) { + if (!CheckFusion(kernel)) { + return RET_OK; + } + UpdatePreTensors(kernel); + UpdatePostTensors(kernel); + UpdatePreKernels(kernel); + UpdatePostKernels(kernel); + auto concat_param = reinterpret_cast(kernel->op_parameter()); + concat_param->axis_ = TransFormAxis(concat_param->axis_); + return RET_OK; +} + +int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) { + if (kernel->out_kernels().empty()) { + return RET_OK; + } + if (!std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) { + return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw; + })) { + return RET_OK; + } + auto pre_kernel = kernel->in_kernels()[0]; + + auto pre_out_kernels = pre_kernel->out_kernels(); + for (size_t i = 0; i < pre_out_kernels.size(); i++) { + if (pre_out_kernels[i] == kernel) { + pre_out_kernels.erase(pre_out_kernels.begin() + i); + break; + } + } + for (const auto &nc2nh : kernel->out_kernels()) { + for (const auto &post_kernel : nc2nh->out_kernels()) { + auto post_in_kernels = post_kernel->in_kernels(); + for (size_t i = 0; i < post_in_kernels.size(); i++) { + if (post_in_kernels[i] == nc2nh) { + post_in_kernels[i] = pre_kernel; + break; + } + } + post_kernel->set_in_kernels(post_in_kernels); + pre_out_kernels.push_back(post_kernel); + } + kernels->erase(find(kernels->begin(), kernels->end(), nc2nh)); + } + pre_kernel->set_out_kernels(pre_out_kernels); + kernels->erase(find(kernels->begin(), kernels->end(), kernel)); + return RET_OK; +} + +int NPUFusionPass::Fusion() { + for (auto kernel : *kernels) { + switch (kernel->Type()) { + case schema::PrimitiveType_Concat: + ConcatFusion(kernel); + continue; + case schema::PrimitiveType_Add: + AddFusion(kernel); + continue; + case schema::PrimitiveType_Nchw2Nhwc: + FormatFusion(kernel); + continue; + default: + continue; + } + } + return RET_OK; +} +} // namespace mindspore::lite diff --git a/mindspore/lite/src/runtime/agent/npu/npu_fusion_pass.h b/mindspore/lite/src/runtime/agent/npu/npu_fusion_pass.h new file mode 100644 index 0000000000..53d77984c7 --- /dev/null +++ b/mindspore/lite/src/runtime/agent/npu/npu_fusion_pass.h @@ -0,0 +1,40 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_ +#include +#include "src/lite_kernel.h" +#include "src/ops/primitive_c.h" +namespace mindspore::lite { +class NPUFusionPass { + public: + explicit NPUFusionPass(std::vector *dst_kernels) { kernels = dst_kernels; } + ~NPUFusionPass() = default; + int Fusion(); + + protected: + int ConcatFusion(kernel::LiteKernel *kernel); + int AddFusion(kernel::LiteKernel *kernel); + int FormatFusion(kernel::LiteKernel *kernel); + void UpdatePreKernels(kernel::LiteKernel *kernel); + void UpdatePostKernels(kernel::LiteKernel *kernel); + + private: + std::vector *kernels; +}; +} // namespace mindspore::lite +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_ diff --git a/mindspore/lite/src/runtime/agent/npu/npu_manager.cc b/mindspore/lite/src/runtime/agent/npu/npu_manager.cc index 40580307cf..c4dff4e56b 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_manager.cc +++ b/mindspore/lite/src/runtime/agent/npu/npu_manager.cc @@ -15,57 +15,65 @@ */ #include "src/runtime/agent/npu/npu_manager.h" +#include #include #include #include "include/hiai_ir_build.h" #include "include/HiAiModelManagerService.h" #include "include/errorcode.h" -#include "include/graph/op/all_ops.h" #include "src/common/file_utils.h" namespace mindspore::lite { +#define MAX_MODEL_NUM 20 +int NPUManager::CompareVersion(const string &version1, const string &version2) { + std::istringstream iss1(version1); + std::istringstream iss2(version2); + string string1; + string string2; + while (!iss1.eof() || !iss2.eof()) { + getline(iss1, string1, '.'); + getline(iss2, string2, '.'); + if (stoi(string1) > stoi(string2)) return 1; + if (stoi(string1) < stoi(string2)) return -1; + string1 = string2 = "0"; + } + return 0; +} -bool NPUManager::IsSupportNPU() { - if (!is_npu_check_executor) { - CheckSupportNPU(); - } - if (is_support_npu) { - MS_LOG(INFO) << "The current device support NPU."; - return true; - } else { - MS_LOG(INFO) << "The current device NOT SUPPORT NPU."; - return false; +bool NPUManager::CheckEMUIVersion() { + char emui[128] = {0x00}; + __system_property_get("ro.build.version.emui", emui); + std::string emui_str = emui; + int pos = emui_str.find('_'); + if (pos != std::string::npos) { + auto version = emui_str.substr(pos + 1); + int ret = CompareVersion(version, "11.0.0"); + if (ret < 0) { + return false; + } } + return true; } -std::string NPUManager::GetExecutorPath() { - std::string executor_path; - char cmdline[1024] = {0}; - int fd = open("/proc/self/cmdline", O_RDONLY); - if (fd >= 0) { - char ch; - int i = 0; - while (read(fd, &ch, sizeof(ch)) > 0 && !isspace(ch)) { - if (':' == ch) { - break; - } - cmdline[i] = ch; - i++; +bool NPUManager::CheckDDKVersion() { + auto client = std::make_shared(); + if (client->GetVersion() != nullptr) { + std::string version = client->GetVersion(); + int ret = CompareVersion(version, "100.330.010.011"); + if (ret < 0) { + return false; } - close(fd); } - executor_path = std::string(cmdline); - if (executor_path.empty()) { - executor_path = "./"; - } - // android - if (executor_path.substr(0, 11) == "/data/data/") { - executor_path = executor_path + '/'; + return true; +} +bool NPUManager::IsSupportNPU() { + if (IsKirinChip() && CheckEMUIVersion() && CheckDDKVersion()) { + MS_LOG(INFO) << "The current device support NPU."; + return true; } else { - // Linux - executor_path = executor_path.substr(0, executor_path.rfind('/')) + "/"; + MS_LOG(INFO) << "The current device NOT SUPPORT NPU."; + return false; } - return executor_path; } bool NPUManager::IsKirinChip() { @@ -96,86 +104,6 @@ bool NPUManager::IsKirinChip() { return false; } -bool WriteToOMFile(domi::ModelBufferData om_model_buff, const std::string &om_file_path) { - FILE *fp; - fp = fopen(om_file_path.c_str(), "wb"); - if (fp == nullptr) { - MS_LOG(ERROR) << om_file_path.c_str() << " open failed."; - return false; - } - - auto write_size = (uint32_t)fwrite(om_model_buff.data, 1, om_model_buff.length, fp); - if (write_size != om_model_buff.length) { - fclose(fp); - MS_LOG(ERROR) << "Write om file failed."; - return false; - } - fclose(fp); - return true; -} - -bool NPUManager::CheckOmBuildIr(const std::string &path) { - // build test om model - std::shared_ptr add_op(new (std::nothrow) hiai::op::Add("add")); - if (add_op == nullptr) { - MS_LOG(ERROR) << "new add_op failed."; - return false; - } - ge::TensorDesc desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT); - std::shared_ptr data = std::make_shared("data"); - data->update_input_desc_x(desc); - add_op->set_input_x1(*data); - add_op->set_input_x2(*data); - domi::HiaiIrBuild ir_build; - ge::Graph ir_graph("graph"); - std::vector inputs{*data, *data}; - std::vector outputs{*add_op}; - ir_graph.SetInputs(inputs).SetOutputs(outputs); - ge::Model om_model("test_model", "test_version"); - om_model.SetGraph(ir_graph); - - domi::ModelBufferData om_model_buff; - if (!ir_build.CreateModelBuff(om_model, om_model_buff)) { - MS_LOG(ERROR) << "Create model buffer failed."; - return false; - } - if (!ir_build.BuildIRModel(om_model, om_model_buff)) { - MS_LOG(ERROR) << "Build IR model failed."; - return false; - } - - // save test om model - remove(path.c_str()); - bool ret = WriteToOMFile(om_model_buff, path); - ir_build.ReleaseModelBuff(om_model_buff); - return ret; -} - -void NPUManager::CheckSupportNPU() { - is_npu_check_executor = true; - std::string path_string = GetExecutorPath(); - - std::string test_model_path = path_string + "/mindspore_lite_test_npu.om"; - std::ifstream ifs(test_model_path); - if (ifs.good() && ifs.is_open()) { - ifs.close(); - is_support_npu = true; - return; - } - if (!IsKirinChip()) { - MS_LOG(ERROR) << "The current device chip NOT SUPPORT NPU"; - is_support_npu = false; - return; - } - - if (!CheckOmBuildIr(test_model_path)) { - MS_LOG(ERROR) << "Build OM IR error."; - is_support_npu = false; - return; - } - is_support_npu = true; -} - int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency) { hiai::MemBuffer *buffer = mc_builder_->InputMemBufferCreate(model_buf, size); if (buffer == nullptr) { @@ -188,33 +116,42 @@ int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &mode model_desc_.push_back(desc); mc_builder_->MemBufferDestroy(buffer); + model_map_.insert({model_name, index_}); index_++; return RET_OK; } -int NPUManager::InitClient() { - this->client_ = std::make_shared(); - if (this->client_ == nullptr) { - return RET_ERROR; - } - int ret = this->client_->Init(nullptr); - if (ret != hiai::AI_SUCCESS) { - return RET_ERROR; - } - mc_builder_ = std::make_shared(this->client_); - return RET_OK; -} - int NPUManager::LoadOMModel() { - int ret = this->client_->Load(model_desc_); - if (ret != hiai::AI_SUCCESS) { - MS_LOG(ERROR) << "Client load model failed." << ret; - return RET_ERROR; + for (int i = 0; i < index_ / MAX_MODEL_NUM + 1; i++) { + auto client = std::make_shared(); + if (client == nullptr) { + MS_LOG(ERROR) << "NPU client is nullptr."; + return RET_ERROR; + } + int ret = client->Init(nullptr); + if (ret != hiai::AI_SUCCESS) { + MS_LOG(ERROR) << "NPU client init failed. code is " << ret; + return RET_ERROR; + } + mc_builder_ = std::make_shared(client); + + vector> desc(model_desc_.begin() + i * MAX_MODEL_NUM, + ((i + 1) * MAX_MODEL_NUM > index_) + ? model_desc_.begin() + index_ + : model_desc_.begin() + (i + 1) * MAX_MODEL_NUM); + ret = client->Load(desc); + if (ret != hiai::AI_SUCCESS) { + MS_LOG(ERROR) << "Client load model failed." << ret; + return RET_ERROR; + } + clients_.push_back(client); } return RET_OK; } -std::shared_ptr NPUManager::GetClient() { return client_; } +std::shared_ptr NPUManager::GetClient(const std::string &model_name) { + return clients_[model_map_[model_name] / MAX_MODEL_NUM]; +} -int NPUManager::index() { return index_; } +int NPUManager::index() const { return index_; } } // namespace mindspore::lite diff --git a/mindspore/lite/src/runtime/agent/npu/npu_manager.h b/mindspore/lite/src/runtime/agent/npu/npu_manager.h index 010e642ded..e825eca3c3 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_manager.h +++ b/mindspore/lite/src/runtime/agent/npu/npu_manager.h @@ -14,15 +14,21 @@ * limitations under the License. */ -#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_ #include #include #include +#include +#include +#include "schema/model_generated.h" #include "include/HiAiModelManagerService.h" namespace mindspore::lite { - +static std::set npu_trans_nodes = { + schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, + schema::PrimitiveType_DepthwiseConv2D, schema::PrimitiveType_DeDepthwiseConv2D, + schema::PrimitiveType_Resize, schema::PrimitiveType_Pooling}; class NPUManager { public: static NPUManager *GetInstance() { @@ -32,8 +38,6 @@ class NPUManager { bool IsSupportNPU(); - int InitClient(); - // provide to subgraph to add model. int AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency); @@ -41,18 +45,18 @@ class NPUManager { int LoadOMModel(); // provide to executor. - std::shared_ptr GetClient(); + std::shared_ptr GetClient(const std::string &model_name); - int index(); + int index() const; private: - void CheckSupportNPU(); - bool IsKirinChip(); - bool CheckOmBuildIr(const std::string &path); + bool CheckEMUIVersion(); - std::string GetExecutorPath(); + bool CheckDDKVersion(); + + int CompareVersion(const std::string &version1, const std::string &version2); private: int index_ = 0; @@ -61,12 +65,14 @@ class NPUManager { bool is_support_npu = false; - std::shared_ptr client_ = nullptr; + std::vector> clients_; std::vector> model_desc_; std::shared_ptr mc_builder_ = nullptr; + + std::unordered_map model_map_; }; } // namespace mindspore::lite -#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_ +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_ diff --git a/mindspore/lite/src/runtime/agent/npu/npu_pass_utils.cc b/mindspore/lite/src/runtime/agent/npu/npu_pass_utils.cc new file mode 100644 index 0000000000..4daaff3c32 --- /dev/null +++ b/mindspore/lite/src/runtime/agent/npu/npu_pass_utils.cc @@ -0,0 +1,102 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/kernel_registry.h" +#include "src/ops/nhwc2nchw.h" +#include "src/ops/nchw2nhwc.h" +#include "src/runtime/agent/npu/npu_pass_utils.h" +namespace mindspore::lite { +using kernel::KERNEL_ARCH::kCPU; +using kernel::KERNEL_ARCH::kNPU; +PrimitiveC *NPUPassUtils::CreateNchw2NhwcPrimitive() { + flatbuffers::FlatBufferBuilder fbb(1024); + auto val_offset = schema::CreateNchw2Nhwc(fbb); + auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nchw2Nhwc, val_offset.o); + fbb.Finish(prim_offset); + auto buf = fbb.GetBufferPointer(); + if (buf == nullptr) { + MS_LOG(ERROR) << "GetBufferPointer return nullptr"; + fbb.Clear(); + return nullptr; + } + auto primitive_buf = reinterpret_cast(malloc(fbb.GetSize())); + if (primitive_buf == nullptr) { + MS_LOG(ERROR) << "Malloc primitive_buf_ failed."; + fbb.Clear(); + return nullptr; + } + memcpy(primitive_buf, buf, fbb.GetSize()); + auto *primitive = PrimitiveC::NewPrimitiveC(flatbuffers::GetRoot(primitive_buf)); + free(primitive_buf); + fbb.Clear(); + return primitive; +} + +PrimitiveC *NPUPassUtils::CreateNhwc2NchwPrimitive() { + flatbuffers::FlatBufferBuilder fbb(1024); + auto val_offset = schema::CreateNhwc2Nchw(fbb); + auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nhwc2Nchw, val_offset.o); + fbb.Finish(prim_offset); + auto buf = fbb.GetBufferPointer(); + if (buf == nullptr) { + MS_LOG(ERROR) << "GetBufferPointer return nullptr"; + fbb.Clear(); + return nullptr; + } + auto primitive_buf = reinterpret_cast(malloc(fbb.GetSize())); + if (primitive_buf == nullptr) { + MS_LOG(ERROR) << "Malloc primitive_buf_ failed."; + fbb.Clear(); + return nullptr; + } + memcpy(primitive_buf, buf, fbb.GetSize()); + auto *primitive = PrimitiveC::NewPrimitiveC(flatbuffers::GetRoot(primitive_buf)); + free(primitive_buf); + fbb.Clear(); + return primitive; +} + +kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector &in_tensors, + const std::vector &out_tensors, + const InnerContext *ctx, const std::string &name) { + kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nchw2Nhwc}; + auto nchw2nhwc_primitive = CreateNchw2NhwcPrimitive(); + auto *nchw2nhwc_kernel = + KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nchw2nhwc_primitive, ctx, key); + nchw2nhwc_kernel->set_name(name); + return nchw2nhwc_kernel; +} + +kernel::LiteKernel *NPUPassUtils::CreateNhwc2NchwKernel(const std::vector &in_tensors, + const std::vector &out_tensors, + const InnerContext *ctx, const std::string &name) { + kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nhwc2Nchw}; + auto nhwc2nchw_primitive = CreateNhwc2NchwPrimitive(); + auto *nhwc2nchw_kernel = + KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nhwc2nchw_primitive, ctx, key); + nhwc2nchw_kernel->set_name(name); + return nhwc2nchw_kernel; +} + +void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vector &in_kernels, + const std::vector &out_kernels, + const std::vector &in_tensors, const std::vector &out_tensors) { + kernel->set_in_tensors(in_tensors); + kernel->set_out_tensors(out_tensors); + kernel->set_in_kernels(in_kernels); + kernel->set_out_kernels(out_kernels); +} +} // namespace mindspore::lite diff --git a/mindspore/lite/src/runtime/agent/npu/npu_pass_utils.h b/mindspore/lite/src/runtime/agent/npu/npu_pass_utils.h new file mode 100644 index 0000000000..c1ae241abe --- /dev/null +++ b/mindspore/lite/src/runtime/agent/npu/npu_pass_utils.h @@ -0,0 +1,44 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_ +#include +#include +#include "src/ops/primitive_c.h" +#include "src/lite_kernel.h" +namespace mindspore::lite { +class NPUPassUtils { + public: + static kernel::LiteKernel *CreateNchw2NhwcKernel(const std::vector &in_tensors, + const std::vector &out_tensors, const InnerContext *ctx, + const std::string &name); + + static kernel::LiteKernel *CreateNhwc2NchwKernel(const std::vector &in_tensors, + const std::vector &out_tensors, const InnerContext *ctx, + const std::string &name); + + static void UpdateKernel(kernel::LiteKernel *kernel, const std::vector &in_kernels, + const std::vector &out_kernels, + const std::vector &in_tensors, const std::vector &out_tensors); + + private: + static PrimitiveC *CreateNchw2NhwcPrimitive(); + + static PrimitiveC *CreateNhwc2NchwPrimitive(); +}; +} // namespace mindspore::lite +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_ diff --git a/mindspore/lite/src/runtime/agent/npu/npu_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/npu_transform_pass.cc new file mode 100644 index 0000000000..232d7d0c6c --- /dev/null +++ b/mindspore/lite/src/runtime/agent/npu/npu_transform_pass.cc @@ -0,0 +1,201 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/runtime/agent/npu/npu_transform_pass.h" +#include +#include "src/lite_kernel.h" +#include "src/runtime/agent/npu/npu_manager.h" +#include "src/runtime/agent/npu/npu_pass_utils.h" +namespace mindspore::lite { +using kernel::KERNEL_ARCH::kCPU; +using kernel::KERNEL_ARCH::kNPU; +int NPUTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *after_kernel) { + std::vector out_kernels; + + for (auto out_kernel : kernel->out_kernels()) { + if (out_kernel == after_kernel) { + out_kernels.push_back(trans_kernel); + } else { + out_kernels.push_back(out_kernel); + } + } + NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors()); + return RET_OK; +} + +int NPUTransformPass::UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *before_kernel) { + std::vector cur_kernel_in_tensors = {trans_kernel->out_tensors()[0]}; + for (int i = 1; i < kernel->in_tensors().size(); i++) { + cur_kernel_in_tensors.push_back(kernel->in_tensors()[i]); + } + std::vector cur_in_kernels = {trans_kernel}; + for (int i = 0; i < kernel->in_kernels().size(); i++) { + auto in_kernel = kernel->in_kernels()[i]; + if (in_kernel != kernel) { + cur_in_kernels.push_back(in_kernel); + } + } + NPUPassUtils::UpdateKernel(kernel, cur_in_kernels, kernel->out_kernels(), cur_kernel_in_tensors, + kernel->out_tensors()); + return RET_OK; +} + +int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vector::iterator it, + std::vector *all_kernels, + std::vector *all_tensors) { + auto kernel = *it; + bool is_input_kernel = kernel->in_kernels().empty(); + if (is_input_kernel || kernel->in_kernels()[0]->desc().arch != kNPU || + npu_trans_nodes.find(kernel->in_kernels()[0]->Type()) == npu_trans_nodes.end()) { + kernel::LiteKernel *before_kernel = nullptr; + if (!is_input_kernel) { + before_kernel = kernel->in_kernels()[0]; + } + // Create pre transform kernel out tensors. + std::vector shapes{kernel->in_tensors()[0]->shape()[0], kernel->in_tensors()[0]->shape()[3], + kernel->in_tensors()[0]->shape()[1], kernel->in_tensors()[0]->shape()[2]}; + auto tensor = new Tensor(kernel->in_tensors()[0]->data_type(), shapes, schema::Format_NCHW, Tensor::VAR); + std::vector pre_trans_out_tensors = {tensor}; + all_tensors->push_back(pre_trans_out_tensors[0]); + // Replace the output tensor of the previous node + auto name = kernel->name() + "_pre_trans" + "_Nhwc2Nchw_" + std::to_string(total++); + auto *pre_trans_kernel = + NPUPassUtils::CreateNhwc2NchwKernel({kernel->in_tensors()[0]}, pre_trans_out_tensors, context, name); + // Insert Nhwc2Nchw into the front of the current queue + all_kernels->push_back(pre_trans_kernel); + // Replace the output kernel of the previous node + std::vector pre_trans_in_kernel; + if (is_input_kernel) { + pre_trans_in_kernel = {}; + } else { + pre_trans_in_kernel = {before_kernel}; + } + NPUPassUtils::UpdateKernel(pre_trans_kernel, pre_trans_in_kernel, {kernel}, {kernel->in_tensors()[0]}, + pre_trans_out_tensors); + + if (before_kernel != nullptr) { + UpdateNH2NCTransNodePreKernel(before_kernel, pre_trans_kernel, kernel); + } + UpdateNH2NCTransNodeAfterKernel(kernel, pre_trans_kernel, before_kernel); + } + return RET_OK; +} + +int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector::iterator it, + std::vector *all_kernels, + std::vector *all_tensors) { + auto kernel = *it; + // Single output multiple references + for (int i = 0; i < kernel->out_kernels().size(); i++) { + auto next_kernel = kernel->out_kernels().at(i); + if (next_kernel->desc().arch == kNPU && npu_trans_nodes.find(next_kernel->Type()) != npu_trans_nodes.end()) { + continue; + } + // Change format the output of the current kernel nhwc->nchw + auto shapes = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[1], + kernel->out_tensors()[0]->shape()[2], kernel->out_tensors()[0]->shape()[3]}; + auto tensor = new Tensor(kernel->out_tensors()[0]->data_type(), shapes, schema::Format_NHWC, Tensor::VAR); + std::vector post_trans_out_tensors = {tensor}; + all_tensors->push_back(post_trans_out_tensors[0]); + // Use the output tensor of the current node as the input tensor of the post-conversion operator + auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++); + auto *post_trans_kernel = + NPUPassUtils::CreateNchw2NhwcKernel(kernel->out_tensors(), post_trans_out_tensors, context, name); + // Replace the input tensor of the next node + NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {next_kernel}, kernel->out_tensors(), + post_trans_out_tensors); + // Directly insert in the back, will not affect the topological sort + all_kernels->push_back(post_trans_kernel); + UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, next_kernel); + UpdateNC2NHTransNodeAfterKernel(kernel, post_trans_kernel, next_kernel); + } + return RET_OK; +} + +int NPUTransformPass::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *next_kernel) { + std::vector cur_out_kernels; + for (auto out_kernel : kernel->out_kernels()) { + if (out_kernel == next_kernel) { + cur_out_kernels.push_back(trans_kernel); + } else { + cur_out_kernels.push_back(out_kernel); + } + } + auto kernel_out_tensor = kernel->out_tensors()[0]; + // Change format the output of the current kernel nhwc->nchw + std::vector kernel_out_new_shapes = {kernel_out_tensor->shape()[0], kernel_out_tensor->shape()[3], + kernel_out_tensor->shape()[1], kernel_out_tensor->shape()[2]}; + kernel_out_tensor->set_format(schema::Format_NCHW); + kernel_out_tensor->set_shape(kernel_out_new_shapes); + NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), cur_out_kernels, kernel->in_tensors(), {kernel_out_tensor}); + return RET_OK; +} + +int NPUTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *next_kernel) { + std::vector next_in_tensors; + for (auto next_in_tensor : next_kernel->in_tensors()) { + if (next_in_tensor != kernel->out_tensors()[0]) { + next_in_tensors.push_back(next_in_tensor); + } else { + next_in_tensors.push_back(trans_kernel->out_tensors()[0]); + } + } + next_kernel->set_in_tensors(next_in_tensors); + std::vector next_in_kernels; + for (auto in_kernel : next_kernel->in_kernels()) { + if (in_kernel == kernel) { + next_in_kernels.push_back(trans_kernel); + } else { + next_in_kernels.push_back(in_kernel); + } + } + NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors, + next_kernel->out_tensors()); + + return RET_OK; +} + +int NPUTransformPass::FormatTransformPass(const InnerContext *context, std::vector *all_kernels, + std::vector *all_tensors) { + if (context->IsNpuEnabled()) { + std::vector new_kernels; + + for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) { + auto kernel = *it; + if (kernel->desc().arch != kNPU) { + new_kernels.push_back(kernel); + continue; + } + if (npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) { + InsertPreNode(context, it, &new_kernels, all_tensors); + new_kernels.push_back(kernel); + InsertPostNode(context, it, &new_kernels, all_tensors); + } else { + new_kernels.push_back(kernel); + } + } + all_kernels->clear(); + for (int i = 0; i < new_kernels.size(); i++) { + all_kernels->push_back(new_kernels[i]); + } + } + return RET_OK; +} + +} // namespace mindspore::lite diff --git a/mindspore/lite/src/runtime/agent/npu/npu_transform_pass.h b/mindspore/lite/src/runtime/agent/npu/npu_transform_pass.h new file mode 100644 index 0000000000..34253d29ac --- /dev/null +++ b/mindspore/lite/src/runtime/agent/npu/npu_transform_pass.h @@ -0,0 +1,51 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_ +#include +#include "src/lite_kernel.h" +#include "src/ops/primitive_c.h" +namespace mindspore::lite { +class NPUTransformPass { + public: + int FormatTransformPass(const InnerContext *context, std::vector *all_kernels, + std::vector *all_tensors); + + private: + int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *after_kernel); + + int UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *before_kernel); + + int UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *after_kernel); + + int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, + kernel::LiteKernel *next_kernel); + + int InsertPreNode(const InnerContext *context, std::vector::iterator it, + std::vector *all_kernels, std::vector *all_tensors); + + int InsertPostNode(const InnerContext *context, std::vector::iterator it, + std::vector *all_kernels, std::vector *all_tensors); + + private: + int total = 0; +}; +} // namespace mindspore::lite +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_ diff --git a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc index 2c1f06367a..57104e214e 100644 --- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc +++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc @@ -24,7 +24,6 @@ #include "include/graph/model.h" #include "include/hiai_ir_build.h" #include "include/HiAiModelManagerType.h" -#include "include/context.h" #include "include/version.h" #include "src/common/utils.h" #include "src/runtime/agent/npu/npu_converter_utils.h" @@ -34,10 +33,6 @@ namespace mindspore::kernel { using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; -std::set trans_nodes = {schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, - schema::PrimitiveType_DepthwiseConv2D, - schema::PrimitiveType_DeDepthwiseConv2D, schema::PrimitiveType_Resize}; - domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { ge::Graph graph("NPUGraph"); @@ -75,8 +70,7 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { } int SubGraphNpuKernel::Run() { - return reinterpret_cast(this->executor_) - ->Run(in_tensors_, out_tensors_, nodes_, inputs_nhwc2nchw_, outputs_nchw2nhwc_); + return reinterpret_cast(this->executor_)->Run(in_tensors_, out_tensors_, nodes_); } int SubGraphNpuKernel::BuildNPUInputOp() { @@ -88,21 +82,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { if (IsSubGraphInputTensor(in_tensor)) { auto tensor_name = node->name() + "_" + std::to_string(count++); hiai::op::Data *data; - if (trans_nodes.find(node->Type()) != trans_nodes.end()) { - auto shape = in_tensor->shape(); - data = new (std::nothrow) hiai::op::Data(tensor_name); - if (data == nullptr) { - MS_LOG(ERROR) << "New data failed."; - return RET_ERROR; - } - ge::TensorDesc tensor_desc(lite::ConverterToNPUShape({shape[0], shape[3], shape[1], shape[2]}), - ge::FORMAT_NCHW, lite::ConverterToNPUDataType(in_tensor->data_type())); - data->update_input_desc_x(tensor_desc); - inputs_nhwc2nchw_.push_back(true); - } else { - data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name); - inputs_nhwc2nchw_.push_back(false); - } + data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name); subgraph_input_op_.push_back(*data); node_input_op.push_back(data); continue; @@ -132,7 +112,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { // weight tensor if (is_weight_tensor) { - if (trans_nodes.find(node->Type()) == trans_nodes.end()) { + if (lite::npu_trans_nodes.find(node->Type()) == lite::npu_trans_nodes.end()) { auto name = node->name() + "_" + std::to_string(count++); auto weight_const = new (std::nothrow) hiai::op::Const(node->name() + "_" + std::to_string(count++)); if (weight_const == nullptr) { @@ -162,11 +142,6 @@ std::vector SubGraphNpuKernel::GetNPUNodes(const vector(nodes[i])->GetNPUOp()); - if (trans_nodes.find(schema::PrimitiveType(nodes[i]->GetPrimitive()->Type())) != trans_nodes.end()) { - outputs_nchw2nhwc_.push_back(true); - } else { - outputs_nchw2nhwc_.push_back(false); - } } return ops; } diff --git a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h index 031bf232b1..2ed220a561 100644 --- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h +++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h @@ -69,10 +69,6 @@ class SubGraphNpuKernel : public SubGraphKernel { std::string GetOMModelName(); private: - std::vector inputs_nhwc2nchw_; - - std::vector outputs_nchw2nhwc_; - domi::ModelBufferData *model_buffer_data_; std::vector subgraph_input_op_; diff --git a/mindspore/lite/src/runtime/kernel/npu/convolution_base_npu.cc b/mindspore/lite/src/runtime/kernel/npu/convolution_base_npu.cc index 0d3b5e1ca6..07e948ad85 100644 --- a/mindspore/lite/src/runtime/kernel/npu/convolution_base_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/convolution_base_npu.cc @@ -16,6 +16,7 @@ #include "src/runtime/kernel/npu/convolution_base_npu.h" #include "src/runtime/agent/npu/npu_converter_utils.h" +#include "nnacl/pack.h" namespace mindspore::kernel { ConvolutionBaseNPUKernel::~ConvolutionBaseNPUKernel() { @@ -39,14 +40,27 @@ int ConvolutionBaseNPUKernel::InitWeightBiasConst(const std::vectorshape(); - inputs[1]->set_shape({weight_shape[0], weight_shape[3], weight_shape[1], weight_shape[2]}); - inputs[1]->set_format(schema::Format_NCHW); - auto weight_tensor = mindspore::lite::ConverterToNPUTensor(inputs[1]); - weight_->set_attr_value(weight_tensor); + auto w_shape = inputs[1]->shape(); + auto nhwc_data = inputs[1]->data_c(); + auto nchw_data = reinterpret_cast(malloc(inputs[1]->ElementsNum() * sizeof(float))); + if (nchw_data == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + PackNHWCToNCHWFp32(nhwc_data, nchw_data, w_shape[0], w_shape[1] * w_shape[2], w_shape[3]); - inputs[1]->set_shape(weight_shape); - inputs[1]->set_format(schema::Format_NHWC); + std::shared_ptr weight_tensor = std::shared_ptr(new (std::nothrow) ge::Tensor()); + if (weight_tensor == nullptr) { + MS_LOG(ERROR) << "new weight_tensor failed."; + return RET_ERROR; + } + ge::TensorDesc tensor_desc(lite::ConverterToNPUShape({w_shape[0], w_shape[3], w_shape[1], w_shape[2]}), + ge::FORMAT_NCHW, lite::ConverterToNPUDataType(inputs[1]->data_type())); + weight_tensor->SetTensorDesc(tensor_desc); + weight_tensor->SetData(reinterpret_cast(nchw_data), inputs[1]->Size()); + + weight_->set_attr_value(weight_tensor); + free(nchw_data); if (inputs.size() >= 3) { bias_ = new (std::nothrow) hiai::op::Const(name_ + "_b"); diff --git a/mindspore/lite/src/runtime/kernel/npu/convolution_base_npu.h b/mindspore/lite/src/runtime/kernel/npu/convolution_base_npu.h index 9bd60a8074..88b6bd5aba 100644 --- a/mindspore/lite/src/runtime/kernel/npu/convolution_base_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/convolution_base_npu.h @@ -17,17 +17,18 @@ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CONVOLUTION_BASE_NPU_H_ #include +#include #include "include/graph/op/all_ops.h" #include "src/runtime/kernel/npu/transpose_base_npu.h" #include "nnacl/conv_parameter.h" namespace mindspore::kernel { -class ConvolutionBaseNPUKernel : public TransposeBaseNPUKernel { +class ConvolutionBaseNPUKernel : public NPUKernel { public: ConvolutionBaseNPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : TransposeBaseNPUKernel(parameter, inputs, outputs, ctx, primitive) {} + : NPUKernel(parameter, inputs, outputs, ctx, primitive) {} ~ConvolutionBaseNPUKernel() override; protected: diff --git a/mindspore/lite/src/runtime/kernel/npu/convolution_depthwise_npu.cc b/mindspore/lite/src/runtime/kernel/npu/convolution_depthwise_npu.cc index 205b71f4ef..6334f9613f 100644 --- a/mindspore/lite/src/runtime/kernel/npu/convolution_depthwise_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/convolution_depthwise_npu.cc @@ -25,7 +25,7 @@ using mindspore::schema::PrimitiveType_DepthwiseConv2D; namespace mindspore::kernel { int ConvolutionDepthwiseNPUKernel::IsSupport(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter) { - return RET_ERROR; + return RET_OK; } int ConvolutionDepthwiseNPUKernel::SetConvDwParam() { @@ -49,19 +49,13 @@ int ConvolutionDepthwiseNPUKernel::SetConvDwParam() { int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector &inputs, const std::vector &outputs, const std::vector &npu_inputs) { - auto ret = SetPreTranspose(npu_inputs[0]); - if (ret != RET_OK) { - MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed."; - return RET_ERROR; - } - // set conv attr param conv_dw_ = new (std::nothrow) hiai::op::ConvolutionDepthwise(name_ + "_conv_depthwise"); if (conv_dw_ == nullptr) { MS_LOG(ERROR) << "New convolution depthwise operator for op " << name_ << " failed."; return RET_ERROR; } - ret = SetConvDwParam(); + auto ret = SetConvDwParam(); if (ret != RET_OK) { MS_LOG(ERROR) << "Set npu op parameter for convolution depthwise op " << name_ << " failed."; return RET_ERROR; @@ -76,7 +70,7 @@ int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vectorset_input_bias(*bias_); } - conv_dw_->set_input_x(*pre_trans_); + conv_dw_->set_input_x(*npu_inputs[0]); if (conv_param_->act_type_ != ActType_No) { ret = SetActivation(conv_dw_, conv_param_->act_type_); @@ -85,21 +79,17 @@ int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vectoract_type_ == ActType_No) { - ret = SetPostTranspose(conv_dw_); + return conv_dw_; } else { - ret = SetPostTranspose(act_); - } - if (ret != RET_OK) { - MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed."; - return RET_ERROR; + return act_; } - return RET_OK; } -ge::Operator *mindspore::kernel::ConvolutionDepthwiseNPUKernel::GetNPUOp() { return post_trans_; } - ConvolutionDepthwiseNPUKernel::~ConvolutionDepthwiseNPUKernel() { if (conv_dw_ != nullptr) { delete conv_dw_; diff --git a/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc b/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc index 7689382602..3126b13286 100644 --- a/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc @@ -24,7 +24,7 @@ using mindspore::schema::PrimitiveType_Conv2D; namespace mindspore::kernel { int ConvolutionNPUKernel::IsSupport(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter) { - return RET_ERROR; + return RET_OK; } int ConvolutionNPUKernel::SetConvParam() { @@ -49,19 +49,13 @@ int ConvolutionNPUKernel::SetConvParam() { int ConvolutionNPUKernel::SetNPUInputs(const std::vector &inputs, const std::vector &outputs, const std::vector &npu_inputs) { - auto ret = SetPreTranspose(npu_inputs[0]); - if (ret != RET_OK) { - MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed."; - return RET_ERROR; - } - // set conv attr param conv_ = new (std::nothrow) hiai::op::Convolution(name_ + "_conv"); if (conv_ == nullptr) { MS_LOG(ERROR) << "New convolution operator for convolution op " << name_ << " failed."; return RET_ERROR; } - ret = SetConvParam(); + auto ret = SetConvParam(); if (ret != RET_OK) { MS_LOG(ERROR) << "Set npu op parameter for convolution op " << name_ << " failed."; return RET_ERROR; @@ -76,7 +70,7 @@ int ConvolutionNPUKernel::SetNPUInputs(const std::vector &inputs if (inputs.size() == 3) { conv_->set_input_bias(*bias_); } - conv_->set_input_x(*pre_trans_); + conv_->set_input_x(*npu_inputs[0]); if (conv_param_->act_type_ != ActType_No) { ret = SetActivation(conv_, conv_param_->act_type_); @@ -85,21 +79,17 @@ int ConvolutionNPUKernel::SetNPUInputs(const std::vector &inputs return RET_ERROR; } } + return RET_OK; +} +ge::Operator *mindspore::kernel::ConvolutionNPUKernel::GetNPUOp() { if (conv_param_->act_type_ == ActType_No) { - ret = SetPostTranspose(conv_); + return conv_; } else { - ret = SetPostTranspose(act_); - } - if (ret != RET_OK) { - MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed."; - return RET_ERROR; + return act_; } - return RET_OK; } -ge::Operator *mindspore::kernel::ConvolutionNPUKernel::GetNPUOp() { return post_trans_; } - ConvolutionNPUKernel::~ConvolutionNPUKernel() { if (conv_ != nullptr) { delete conv_; diff --git a/mindspore/lite/src/runtime/kernel/npu/pooling_npu.cc b/mindspore/lite/src/runtime/kernel/npu/pooling_npu.cc index e23c88261f..25d6b4c301 100644 --- a/mindspore/lite/src/runtime/kernel/npu/pooling_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/pooling_npu.cc @@ -62,23 +62,17 @@ int PoolingNPUKernel::SetPoolingParam() { int PoolingNPUKernel::SetNPUInputs(const std::vector &inputs, const std::vector &outputs, const std::vector &npu_inputs) { - auto ret = SetPreTranspose(npu_inputs[0]); - if (ret != RET_OK) { - MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed."; - return RET_ERROR; - } - pooling_ = new (std::nothrow) hiai::op::PoolingD(name_ + "_pooling"); if (pooling_ == nullptr) { MS_LOG(ERROR) << "New pooling npu operator for op " << name_ << " failed."; return RET_ERROR; } - ret = SetPoolingParam(); + auto ret = SetPoolingParam(); if (ret != RET_OK) { MS_LOG(ERROR) << "Set npu op parameter for convolution op " << name_ << " failed."; return RET_ERROR; } - pooling_->set_input_x(*pre_trans_); + pooling_->set_input_x(*npu_inputs[0]); if (pooling_param_->act_type_ != ActType_No) { ret = SetActivation(pooling_, pooling_param_->act_type_); @@ -87,21 +81,17 @@ int PoolingNPUKernel::SetNPUInputs(const std::vector &inputs, return RET_ERROR; } } + return RET_OK; +} +ge::Operator *mindspore::kernel::PoolingNPUKernel::GetNPUOp() { if (pooling_param_->act_type_ == ActType_No) { - ret = SetPostTranspose(pooling_); + return pooling_; } else { - ret = SetPostTranspose(act_); + return act_; } - if (ret != RET_OK) { - MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed."; - return RET_ERROR; - } - return RET_OK; } -ge::Operator *mindspore::kernel::PoolingNPUKernel::GetNPUOp() { return post_trans_; } - PoolingNPUKernel::~PoolingNPUKernel() { if (pooling_ != nullptr) { delete pooling_; diff --git a/mindspore/lite/src/runtime/kernel/npu/resize_npu.cc b/mindspore/lite/src/runtime/kernel/npu/resize_npu.cc index 17bcb35fb8..b478e31b25 100644 --- a/mindspore/lite/src/runtime/kernel/npu/resize_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/resize_npu.cc @@ -36,12 +36,6 @@ int ResizeNPUKernel::IsSupport(const std::vector &inputs, const int ResizeNPUKernel::SetNPUInputs(const std::vector &inputs, const std::vector &outputs, const std::vector &npu_inputs) { - auto ret = SetPreTranspose(npu_inputs[0]); - if (ret != RET_OK) { - MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed."; - return RET_ERROR; - } - ge::TensorDesc sizeTensorDesc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_INT32); ge::TensorPtr sizeTensor = std::make_shared(sizeTensorDesc); vector dataValue = {static_cast(new_height_), static_cast(new_width_)}; @@ -55,7 +49,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector &inputs, con return RET_ERROR; } op->set_attr_align_corners(align_corners_); - op->set_input_x(*pre_trans_); + op->set_input_x(*npu_inputs[0]); op->set_input_size(*out_size); op->set_attr_half_pixel_centers(preserve_aspect_ratio_); op_ = op; @@ -66,21 +60,14 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector &inputs, con return RET_ERROR; } op->set_attr_align_corners(align_corners_); - op->set_input_x(*pre_trans_); + op->set_input_x(*npu_inputs[0]); op->set_input_size(*out_size); op_ = op; } - - ret = SetPostTranspose(op_); - if (ret != RET_OK) { - MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed."; - return RET_ERROR; - } - return RET_OK; } -ge::Operator *mindspore::kernel::ResizeNPUKernel::GetNPUOp() { return this->post_trans_; } +ge::Operator *mindspore::kernel::ResizeNPUKernel::GetNPUOp() { return this->op_; } ResizeNPUKernel::~ResizeNPUKernel() { if (op_ != nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/npu/resize_npu.h b/mindspore/lite/src/runtime/kernel/npu/resize_npu.h index 5077ac58e4..726d8ef655 100644 --- a/mindspore/lite/src/runtime/kernel/npu/resize_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/resize_npu.h @@ -24,12 +24,12 @@ #include "include/graph/op/all_ops.h" #include "src/runtime/kernel/npu/transpose_base_npu.h" namespace mindspore::kernel { -class ResizeNPUKernel : public TransposeBaseNPUKernel { +class ResizeNPUKernel : public NPUKernel { public: ResizeNPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : TransposeBaseNPUKernel(parameter, inputs, outputs, ctx, primitive) { + : NPUKernel(parameter, inputs, outputs, ctx, primitive) { auto resize_parameter = reinterpret_cast(parameter); method_ = resize_parameter->method_; new_height_ = resize_parameter->new_height_; diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index 527f4ff2fb..3282708f1c 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -33,6 +33,8 @@ #if SUPPORT_NPU #include "src/runtime/agent/npu/subgraph_npu_kernel.h" #include "src/runtime/agent/npu/npu_manager.h" +#include "src/runtime/agent/npu/npu_transform_pass.h" +#include "src/runtime/agent/npu/npu_fusion_pass.h" #endif namespace mindspore::lite { using kernel::KERNEL_ARCH::kCPU; @@ -63,6 +65,11 @@ int Scheduler::Schedule(std::vector *dst_kernels) { return ret; } FindAllInoutKernels(*dst_kernels); + ret = RunPass(dst_kernels); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Schedule run pass failed."; + return ret; + } ret = ConstructSubGraphs(dst_kernels); if (ret != RET_OK) { MS_LOG(ERROR) << "ConstructSubGraphs failed."; @@ -514,4 +521,25 @@ void Scheduler::FindAllInoutKernels(const std::vector &ker kernel->FindInoutKernels(kernels); } } + +int Scheduler::RunPass(std::vector *dst_kernels) { + int ret = RET_OK; +#if SUPPORT_NPU + auto transform_pass = new NPUTransformPass; + ret = transform_pass->FormatTransformPass(context_, dst_kernels, &src_tensors_); + delete transform_pass; + if (ret != RET_OK) { + MS_LOG(ERROR) << "Run npu format transform pass failed."; + return ret; + } + auto fusion_pass = new NPUFusionPass(dst_kernels); + ret = fusion_pass->Fusion(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Run npu fussion transform pass failed."; + return ret; + } + delete fusion_pass; +#endif + return ret; +} } // namespace mindspore::lite diff --git a/mindspore/lite/src/scheduler.h b/mindspore/lite/src/scheduler.h index 0ef4f87783..3f9accf6b1 100644 --- a/mindspore/lite/src/scheduler.h +++ b/mindspore/lite/src/scheduler.h @@ -77,6 +77,8 @@ class Scheduler { static kernel::SubGraphType GetKernelSubGraphType(const kernel::LiteKernel *kernel); + int RunPass(std::vector *dst_kernels); + protected: const InnerContext *context_ = nullptr; Model *src_model_ = nullptr;