| @@ -95,6 +95,8 @@ class LiteKernel { | |||
| virtual int Init() { return mindspore::lite::RET_ERROR; } | |||
| OpParameter *op_parameter() { return op_parameter_; } | |||
| std::string name() const { return this->name_; } | |||
| virtual int Train() { | |||
| @@ -479,12 +479,6 @@ int LiteSession::Init(const Context *context) { | |||
| is_running_.store(false); | |||
| return ret; | |||
| } | |||
| ret = InitNPURuntime(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init NPU runtime failed."; | |||
| is_running_.store(false); | |||
| return ret; | |||
| } | |||
| executor_ = new (std::nothrow) Executor(); | |||
| if (nullptr == executor_) { | |||
| MS_LOG(ERROR) << "New Executor failed"; | |||
| @@ -661,18 +655,6 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs | |||
| return RET_OK; | |||
| } | |||
| int LiteSession::InitNPURuntime() { | |||
| #if SUPPORT_NPU | |||
| if (this->context_->IsNpuEnabled()) { | |||
| if (mindspore::lite::NPUManager::GetInstance()->InitClient() != RET_OK) { | |||
| MS_LOG(ERROR) << "NPU client init error."; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| #endif | |||
| return RET_OK; | |||
| } | |||
| int LiteSession::InitGPURuntime() { | |||
| #if SUPPORT_GPU | |||
| if (this->context_->IsGpuEnabled()) { | |||
| @@ -103,8 +103,6 @@ class LiteSession : public session::LiteSession { | |||
| private: | |||
| void ResetInputsShape(const std::vector<std::vector<int>> &dims); | |||
| int InitNPURuntime(); | |||
| int InitGPURuntime(); | |||
| protected: | |||
| @@ -17,10 +17,9 @@ | |||
| #include "src/runtime/agent/npu/npu_executor.h" | |||
| #include "include/errorcode.h" | |||
| #include "src/runtime/agent/npu/npu_manager.h" | |||
| #include "nnacl/pack.h" | |||
| namespace mindspore::lite { | |||
| int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { | |||
| this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(); | |||
| this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(model_name_); | |||
| if (this->client_ == nullptr) { | |||
| MS_LOG(ERROR) << "client is nullptr."; | |||
| return RET_ERROR; | |||
| @@ -33,9 +32,8 @@ int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { | |||
| } | |||
| int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| const std::vector<kernel::LiteKernel *> &kernels, const std::vector<bool> &inputs_nhwc2nchw, | |||
| const std::vector<bool> &outputs_nchw2nhwc, Allocator *allocator, const KernelCallBack &before, | |||
| const KernelCallBack &after) { | |||
| const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator, | |||
| const KernelCallBack &before, const KernelCallBack &after) { | |||
| hiai::AiContext context; | |||
| for (int i = 0; i < npu_input_tensors_.size(); ++i) { | |||
| void *data = in_tensors[i]->data_c(); | |||
| @@ -43,12 +41,7 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector< | |||
| MS_LOG(ERROR) << model_name_ << " inputs data is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| if (inputs_nhwc2nchw[i]) { | |||
| PackNHWCToNCHWFp32(data, npu_input_tensors_[i]->GetBuffer(), in_tensors[i]->Batch(), | |||
| in_tensors[i]->Width() * in_tensors[i]->Height(), in_tensors[i]->Channel()); | |||
| } else { | |||
| memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[i]->Size()); | |||
| } | |||
| memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[i]->Size()); | |||
| } | |||
| context.AddPara("model_name", model_name_); | |||
| if (this->client_ == nullptr) { | |||
| @@ -68,12 +61,7 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector< | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| if (outputs_nchw2nhwc[i]) { | |||
| PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data, out_tensors[i]->Batch(), | |||
| out_tensors[i]->Width() * out_tensors[i]->Height(), out_tensors[i]->Channel()); | |||
| } else { | |||
| memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); | |||
| } | |||
| memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); | |||
| out_tensors[i]->ResetRefCount(); | |||
| } | |||
| return RET_OK; | |||
| @@ -32,8 +32,7 @@ class NPUExecutor : public Executor { | |||
| int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override; | |||
| int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| const std::vector<kernel::LiteKernel *> &kernels, const std::vector<bool> &inputs_nhwc2nchw, | |||
| const std::vector<bool> &outputs_nchw2nhwc, Allocator *allocator = nullptr, | |||
| const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator = nullptr, | |||
| const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr); | |||
| private: | |||
| @@ -0,0 +1,224 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/agent/npu/npu_fusion_pass.h" | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "nnacl/concat_parameter.h" | |||
| namespace mindspore::lite { | |||
| bool CheckFusion(kernel::LiteKernel *kernel) { | |||
| auto pre_flag = | |||
| std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *kernel) { | |||
| return kernel->Type() == schema::PrimitiveType_Nchw2Nhwc && kernel->out_kernels().size() == 1; | |||
| }); | |||
| if (!pre_flag) { | |||
| return false; | |||
| } | |||
| auto post_flag = | |||
| std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) { | |||
| return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw && kernel->in_kernels().size() == 1; | |||
| }); | |||
| return post_flag; | |||
| } | |||
| void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) { | |||
| for (auto in_kernel : cur_kernel->in_kernels()) { | |||
| auto pre_kernel = in_kernel->in_kernels()[0]; | |||
| auto pre_out_kernels = pre_kernel->out_kernels(); | |||
| for (size_t i = 0; i < pre_out_kernels.size(); i++) { | |||
| if (pre_out_kernels[i] == in_kernel) { | |||
| pre_out_kernels[i] = cur_kernel; | |||
| break; | |||
| } | |||
| } | |||
| pre_kernel->set_out_kernels(pre_out_kernels); | |||
| auto cur_in_kernels = cur_kernel->in_kernels(); | |||
| for (size_t i = 0; i < cur_in_kernels.size(); i++) { | |||
| if (cur_in_kernels[i] == in_kernel) { | |||
| cur_in_kernels[i] = pre_kernel; | |||
| break; | |||
| } | |||
| } | |||
| cur_kernel->set_in_kernels(cur_in_kernels); | |||
| kernels->erase(find(kernels->begin(), kernels->end(), in_kernel)); | |||
| } | |||
| } | |||
| void NPUFusionPass::UpdatePostKernels(kernel::LiteKernel *cur_kernel) { | |||
| for (auto out_kernel : cur_kernel->out_kernels()) { | |||
| auto post_kernel = out_kernel->out_kernels()[0]; | |||
| auto post_in_kernels = post_kernel->in_kernels(); | |||
| for (size_t i = 0; i < post_in_kernels.size(); i++) { | |||
| if (post_in_kernels[i] == out_kernel) { | |||
| post_in_kernels[i] = cur_kernel; | |||
| break; | |||
| } | |||
| } | |||
| post_kernel->set_in_kernels(post_in_kernels); | |||
| auto cur_out_kernels = cur_kernel->out_kernels(); | |||
| for (size_t i = 0; i < cur_out_kernels.size(); i++) { | |||
| if (cur_out_kernels[i] == out_kernel) { | |||
| cur_out_kernels[i] = post_kernel; | |||
| break; | |||
| } | |||
| } | |||
| cur_kernel->set_out_kernels(cur_out_kernels); | |||
| kernels->erase(find(kernels->begin(), kernels->end(), out_kernel)); | |||
| } | |||
| } | |||
| void UpdatePreTensors(kernel::LiteKernel *cur_kernel) { | |||
| auto tensors_vec = cur_kernel->in_tensors(); | |||
| for (auto in_kernel : cur_kernel->in_kernels()) { | |||
| lite::Tensor *cur_tensor = nullptr; | |||
| auto in_tensor = in_kernel->in_tensors()[0]; | |||
| auto out_tensor = in_kernel->out_tensors()[0]; | |||
| auto pre_kernel = in_kernel->in_kernels()[0]; | |||
| for (size_t i = 0; i < pre_kernel->out_tensors().size(); i++) { | |||
| if (pre_kernel->out_tensors()[i] == in_tensor) { | |||
| cur_tensor = pre_kernel->out_tensors()[i]; | |||
| } | |||
| } | |||
| for (size_t i = 0; i < tensors_vec.size(); i++) { | |||
| if (tensors_vec[i] == out_tensor) { | |||
| tensors_vec[i] = cur_tensor; | |||
| } | |||
| } | |||
| } | |||
| cur_kernel->set_in_tensors(tensors_vec); | |||
| } | |||
| void UpdatePostTensors(kernel::LiteKernel *cur_kernel) { | |||
| auto tensors_vec = cur_kernel->out_tensors(); | |||
| for (auto out_kernel : cur_kernel->out_kernels()) { | |||
| auto in_tensor = out_kernel->in_tensors()[0]; | |||
| auto out_tensor = out_kernel->out_tensors()[0]; | |||
| auto post_kernel = out_kernel->out_kernels()[0]; | |||
| lite::Tensor *cur_tensor = nullptr; | |||
| for (size_t i = 0; i < post_kernel->in_tensors().size(); i++) { | |||
| if (post_kernel->in_tensors()[i] == out_tensor) { | |||
| cur_tensor = post_kernel->in_tensors()[i]; | |||
| } | |||
| } | |||
| for (size_t i = 0; i < tensors_vec.size(); i++) { | |||
| if (tensors_vec[i] == in_tensor) { | |||
| tensors_vec[i] = cur_tensor; | |||
| } | |||
| } | |||
| } | |||
| cur_kernel->set_out_tensors(tensors_vec); | |||
| } | |||
| int TransFormAxis(int axis) { | |||
| switch (axis) { | |||
| case 0: | |||
| return 0; | |||
| case 1: | |||
| return 2; | |||
| case 2: | |||
| return 3; | |||
| case 3: | |||
| case -1: | |||
| return 1; | |||
| default: | |||
| return -2; | |||
| } | |||
| } | |||
| int NPUFusionPass::AddFusion(kernel::LiteKernel *kernel) { | |||
| if (!CheckFusion(kernel)) { | |||
| return RET_OK; | |||
| } | |||
| UpdatePreTensors(kernel); | |||
| UpdatePostTensors(kernel); | |||
| UpdatePreKernels(kernel); | |||
| UpdatePostKernels(kernel); | |||
| return RET_OK; | |||
| } | |||
| int NPUFusionPass::ConcatFusion(kernel::LiteKernel *kernel) { | |||
| if (!CheckFusion(kernel)) { | |||
| return RET_OK; | |||
| } | |||
| UpdatePreTensors(kernel); | |||
| UpdatePostTensors(kernel); | |||
| UpdatePreKernels(kernel); | |||
| UpdatePostKernels(kernel); | |||
| auto concat_param = reinterpret_cast<ConcatParameter *>(kernel->op_parameter()); | |||
| concat_param->axis_ = TransFormAxis(concat_param->axis_); | |||
| return RET_OK; | |||
| } | |||
| int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) { | |||
| if (kernel->out_kernels().empty()) { | |||
| return RET_OK; | |||
| } | |||
| if (!std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) { | |||
| return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw; | |||
| })) { | |||
| return RET_OK; | |||
| } | |||
| auto pre_kernel = kernel->in_kernels()[0]; | |||
| auto pre_out_kernels = pre_kernel->out_kernels(); | |||
| for (size_t i = 0; i < pre_out_kernels.size(); i++) { | |||
| if (pre_out_kernels[i] == kernel) { | |||
| pre_out_kernels.erase(pre_out_kernels.begin() + i); | |||
| break; | |||
| } | |||
| } | |||
| for (const auto &nc2nh : kernel->out_kernels()) { | |||
| for (const auto &post_kernel : nc2nh->out_kernels()) { | |||
| auto post_in_kernels = post_kernel->in_kernels(); | |||
| for (size_t i = 0; i < post_in_kernels.size(); i++) { | |||
| if (post_in_kernels[i] == nc2nh) { | |||
| post_in_kernels[i] = pre_kernel; | |||
| break; | |||
| } | |||
| } | |||
| post_kernel->set_in_kernels(post_in_kernels); | |||
| pre_out_kernels.push_back(post_kernel); | |||
| } | |||
| kernels->erase(find(kernels->begin(), kernels->end(), nc2nh)); | |||
| } | |||
| pre_kernel->set_out_kernels(pre_out_kernels); | |||
| kernels->erase(find(kernels->begin(), kernels->end(), kernel)); | |||
| return RET_OK; | |||
| } | |||
| int NPUFusionPass::Fusion() { | |||
| for (auto kernel : *kernels) { | |||
| switch (kernel->Type()) { | |||
| case schema::PrimitiveType_Concat: | |||
| ConcatFusion(kernel); | |||
| continue; | |||
| case schema::PrimitiveType_Add: | |||
| AddFusion(kernel); | |||
| continue; | |||
| case schema::PrimitiveType_Nchw2Nhwc: | |||
| FormatFusion(kernel); | |||
| continue; | |||
| default: | |||
| continue; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::lite | |||
| @@ -0,0 +1,40 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "src/ops/primitive_c.h" | |||
| namespace mindspore::lite { | |||
| class NPUFusionPass { | |||
| public: | |||
| explicit NPUFusionPass(std::vector<kernel::LiteKernel *> *dst_kernels) { kernels = dst_kernels; } | |||
| ~NPUFusionPass() = default; | |||
| int Fusion(); | |||
| protected: | |||
| int ConcatFusion(kernel::LiteKernel *kernel); | |||
| int AddFusion(kernel::LiteKernel *kernel); | |||
| int FormatFusion(kernel::LiteKernel *kernel); | |||
| void UpdatePreKernels(kernel::LiteKernel *kernel); | |||
| void UpdatePostKernels(kernel::LiteKernel *kernel); | |||
| private: | |||
| std::vector<kernel::LiteKernel *> *kernels; | |||
| }; | |||
| } // namespace mindspore::lite | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_ | |||
| @@ -15,57 +15,65 @@ | |||
| */ | |||
| #include "src/runtime/agent/npu/npu_manager.h" | |||
| #include <sys/system_properties.h> | |||
| #include <sys/fcntl.h> | |||
| #include <unistd.h> | |||
| #include "include/hiai_ir_build.h" | |||
| #include "include/HiAiModelManagerService.h" | |||
| #include "include/errorcode.h" | |||
| #include "include/graph/op/all_ops.h" | |||
| #include "src/common/file_utils.h" | |||
| namespace mindspore::lite { | |||
| #define MAX_MODEL_NUM 20 | |||
| int NPUManager::CompareVersion(const string &version1, const string &version2) { | |||
| std::istringstream iss1(version1); | |||
| std::istringstream iss2(version2); | |||
| string string1; | |||
| string string2; | |||
| while (!iss1.eof() || !iss2.eof()) { | |||
| getline(iss1, string1, '.'); | |||
| getline(iss2, string2, '.'); | |||
| if (stoi(string1) > stoi(string2)) return 1; | |||
| if (stoi(string1) < stoi(string2)) return -1; | |||
| string1 = string2 = "0"; | |||
| } | |||
| return 0; | |||
| } | |||
| bool NPUManager::IsSupportNPU() { | |||
| if (!is_npu_check_executor) { | |||
| CheckSupportNPU(); | |||
| } | |||
| if (is_support_npu) { | |||
| MS_LOG(INFO) << "The current device support NPU."; | |||
| return true; | |||
| } else { | |||
| MS_LOG(INFO) << "The current device NOT SUPPORT NPU."; | |||
| return false; | |||
| bool NPUManager::CheckEMUIVersion() { | |||
| char emui[128] = {0x00}; | |||
| __system_property_get("ro.build.version.emui", emui); | |||
| std::string emui_str = emui; | |||
| int pos = emui_str.find('_'); | |||
| if (pos != std::string::npos) { | |||
| auto version = emui_str.substr(pos + 1); | |||
| int ret = CompareVersion(version, "11.0.0"); | |||
| if (ret < 0) { | |||
| return false; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| std::string NPUManager::GetExecutorPath() { | |||
| std::string executor_path; | |||
| char cmdline[1024] = {0}; | |||
| int fd = open("/proc/self/cmdline", O_RDONLY); | |||
| if (fd >= 0) { | |||
| char ch; | |||
| int i = 0; | |||
| while (read(fd, &ch, sizeof(ch)) > 0 && !isspace(ch)) { | |||
| if (':' == ch) { | |||
| break; | |||
| } | |||
| cmdline[i] = ch; | |||
| i++; | |||
| bool NPUManager::CheckDDKVersion() { | |||
| auto client = std::make_shared<hiai::AiModelMngerClient>(); | |||
| if (client->GetVersion() != nullptr) { | |||
| std::string version = client->GetVersion(); | |||
| int ret = CompareVersion(version, "100.330.010.011"); | |||
| if (ret < 0) { | |||
| return false; | |||
| } | |||
| close(fd); | |||
| } | |||
| executor_path = std::string(cmdline); | |||
| if (executor_path.empty()) { | |||
| executor_path = "./"; | |||
| } | |||
| // android | |||
| if (executor_path.substr(0, 11) == "/data/data/") { | |||
| executor_path = executor_path + '/'; | |||
| return true; | |||
| } | |||
| bool NPUManager::IsSupportNPU() { | |||
| if (IsKirinChip() && CheckEMUIVersion() && CheckDDKVersion()) { | |||
| MS_LOG(INFO) << "The current device support NPU."; | |||
| return true; | |||
| } else { | |||
| // Linux | |||
| executor_path = executor_path.substr(0, executor_path.rfind('/')) + "/"; | |||
| MS_LOG(INFO) << "The current device NOT SUPPORT NPU."; | |||
| return false; | |||
| } | |||
| return executor_path; | |||
| } | |||
| bool NPUManager::IsKirinChip() { | |||
| @@ -96,86 +104,6 @@ bool NPUManager::IsKirinChip() { | |||
| return false; | |||
| } | |||
| bool WriteToOMFile(domi::ModelBufferData om_model_buff, const std::string &om_file_path) { | |||
| FILE *fp; | |||
| fp = fopen(om_file_path.c_str(), "wb"); | |||
| if (fp == nullptr) { | |||
| MS_LOG(ERROR) << om_file_path.c_str() << " open failed."; | |||
| return false; | |||
| } | |||
| auto write_size = (uint32_t)fwrite(om_model_buff.data, 1, om_model_buff.length, fp); | |||
| if (write_size != om_model_buff.length) { | |||
| fclose(fp); | |||
| MS_LOG(ERROR) << "Write om file failed."; | |||
| return false; | |||
| } | |||
| fclose(fp); | |||
| return true; | |||
| } | |||
| bool NPUManager::CheckOmBuildIr(const std::string &path) { | |||
| // build test om model | |||
| std::shared_ptr<hiai::op::Add> add_op(new (std::nothrow) hiai::op::Add("add")); | |||
| if (add_op == nullptr) { | |||
| MS_LOG(ERROR) << "new add_op failed."; | |||
| return false; | |||
| } | |||
| ge::TensorDesc desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT); | |||
| std::shared_ptr<hiai::op::Data> data = std::make_shared<hiai::op::Data>("data"); | |||
| data->update_input_desc_x(desc); | |||
| add_op->set_input_x1(*data); | |||
| add_op->set_input_x2(*data); | |||
| domi::HiaiIrBuild ir_build; | |||
| ge::Graph ir_graph("graph"); | |||
| std::vector<ge::Operator> inputs{*data, *data}; | |||
| std::vector<ge::Operator> outputs{*add_op}; | |||
| ir_graph.SetInputs(inputs).SetOutputs(outputs); | |||
| ge::Model om_model("test_model", "test_version"); | |||
| om_model.SetGraph(ir_graph); | |||
| domi::ModelBufferData om_model_buff; | |||
| if (!ir_build.CreateModelBuff(om_model, om_model_buff)) { | |||
| MS_LOG(ERROR) << "Create model buffer failed."; | |||
| return false; | |||
| } | |||
| if (!ir_build.BuildIRModel(om_model, om_model_buff)) { | |||
| MS_LOG(ERROR) << "Build IR model failed."; | |||
| return false; | |||
| } | |||
| // save test om model | |||
| remove(path.c_str()); | |||
| bool ret = WriteToOMFile(om_model_buff, path); | |||
| ir_build.ReleaseModelBuff(om_model_buff); | |||
| return ret; | |||
| } | |||
| void NPUManager::CheckSupportNPU() { | |||
| is_npu_check_executor = true; | |||
| std::string path_string = GetExecutorPath(); | |||
| std::string test_model_path = path_string + "/mindspore_lite_test_npu.om"; | |||
| std::ifstream ifs(test_model_path); | |||
| if (ifs.good() && ifs.is_open()) { | |||
| ifs.close(); | |||
| is_support_npu = true; | |||
| return; | |||
| } | |||
| if (!IsKirinChip()) { | |||
| MS_LOG(ERROR) << "The current device chip NOT SUPPORT NPU"; | |||
| is_support_npu = false; | |||
| return; | |||
| } | |||
| if (!CheckOmBuildIr(test_model_path)) { | |||
| MS_LOG(ERROR) << "Build OM IR error."; | |||
| is_support_npu = false; | |||
| return; | |||
| } | |||
| is_support_npu = true; | |||
| } | |||
| int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency) { | |||
| hiai::MemBuffer *buffer = mc_builder_->InputMemBufferCreate(model_buf, size); | |||
| if (buffer == nullptr) { | |||
| @@ -188,33 +116,42 @@ int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &mode | |||
| model_desc_.push_back(desc); | |||
| mc_builder_->MemBufferDestroy(buffer); | |||
| model_map_.insert({model_name, index_}); | |||
| index_++; | |||
| return RET_OK; | |||
| } | |||
| int NPUManager::InitClient() { | |||
| this->client_ = std::make_shared<hiai::AiModelMngerClient>(); | |||
| if (this->client_ == nullptr) { | |||
| return RET_ERROR; | |||
| } | |||
| int ret = this->client_->Init(nullptr); | |||
| if (ret != hiai::AI_SUCCESS) { | |||
| return RET_ERROR; | |||
| } | |||
| mc_builder_ = std::make_shared<hiai::AiModelBuilder>(this->client_); | |||
| return RET_OK; | |||
| } | |||
| int NPUManager::LoadOMModel() { | |||
| int ret = this->client_->Load(model_desc_); | |||
| if (ret != hiai::AI_SUCCESS) { | |||
| MS_LOG(ERROR) << "Client load model failed." << ret; | |||
| return RET_ERROR; | |||
| for (int i = 0; i < index_ / MAX_MODEL_NUM + 1; i++) { | |||
| auto client = std::make_shared<hiai::AiModelMngerClient>(); | |||
| if (client == nullptr) { | |||
| MS_LOG(ERROR) << "NPU client is nullptr."; | |||
| return RET_ERROR; | |||
| } | |||
| int ret = client->Init(nullptr); | |||
| if (ret != hiai::AI_SUCCESS) { | |||
| MS_LOG(ERROR) << "NPU client init failed. code is " << ret; | |||
| return RET_ERROR; | |||
| } | |||
| mc_builder_ = std::make_shared<hiai::AiModelBuilder>(client); | |||
| vector<std::shared_ptr<hiai::AiModelDescription>> desc(model_desc_.begin() + i * MAX_MODEL_NUM, | |||
| ((i + 1) * MAX_MODEL_NUM > index_) | |||
| ? model_desc_.begin() + index_ | |||
| : model_desc_.begin() + (i + 1) * MAX_MODEL_NUM); | |||
| ret = client->Load(desc); | |||
| if (ret != hiai::AI_SUCCESS) { | |||
| MS_LOG(ERROR) << "Client load model failed." << ret; | |||
| return RET_ERROR; | |||
| } | |||
| clients_.push_back(client); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient() { return client_; } | |||
| std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient(const std::string &model_name) { | |||
| return clients_[model_map_[model_name] / MAX_MODEL_NUM]; | |||
| } | |||
| int NPUManager::index() { return index_; } | |||
| int NPUManager::index() const { return index_; } | |||
| } // namespace mindspore::lite | |||
| @@ -14,15 +14,21 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_ | |||
| #include <string> | |||
| #include <memory> | |||
| #include <vector> | |||
| #include <unordered_map> | |||
| #include <set> | |||
| #include "schema/model_generated.h" | |||
| #include "include/HiAiModelManagerService.h" | |||
| namespace mindspore::lite { | |||
| static std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = { | |||
| schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, | |||
| schema::PrimitiveType_DepthwiseConv2D, schema::PrimitiveType_DeDepthwiseConv2D, | |||
| schema::PrimitiveType_Resize, schema::PrimitiveType_Pooling}; | |||
| class NPUManager { | |||
| public: | |||
| static NPUManager *GetInstance() { | |||
| @@ -32,8 +38,6 @@ class NPUManager { | |||
| bool IsSupportNPU(); | |||
| int InitClient(); | |||
| // provide to subgraph to add model. | |||
| int AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency); | |||
| @@ -41,18 +45,18 @@ class NPUManager { | |||
| int LoadOMModel(); | |||
| // provide to executor. | |||
| std::shared_ptr<hiai::AiModelMngerClient> GetClient(); | |||
| std::shared_ptr<hiai::AiModelMngerClient> GetClient(const std::string &model_name); | |||
| int index(); | |||
| int index() const; | |||
| private: | |||
| void CheckSupportNPU(); | |||
| bool IsKirinChip(); | |||
| bool CheckOmBuildIr(const std::string &path); | |||
| bool CheckEMUIVersion(); | |||
| std::string GetExecutorPath(); | |||
| bool CheckDDKVersion(); | |||
| int CompareVersion(const std::string &version1, const std::string &version2); | |||
| private: | |||
| int index_ = 0; | |||
| @@ -61,12 +65,14 @@ class NPUManager { | |||
| bool is_support_npu = false; | |||
| std::shared_ptr<hiai::AiModelMngerClient> client_ = nullptr; | |||
| std::vector<std::shared_ptr<hiai::AiModelMngerClient>> clients_; | |||
| std::vector<std::shared_ptr<hiai::AiModelDescription>> model_desc_; | |||
| std::shared_ptr<hiai::AiModelBuilder> mc_builder_ = nullptr; | |||
| std::unordered_map<std::string, int> model_map_; | |||
| }; | |||
| } // namespace mindspore::lite | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_ | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_ | |||
| @@ -0,0 +1,102 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/kernel_registry.h" | |||
| #include "src/ops/nhwc2nchw.h" | |||
| #include "src/ops/nchw2nhwc.h" | |||
| #include "src/runtime/agent/npu/npu_pass_utils.h" | |||
| namespace mindspore::lite { | |||
| using kernel::KERNEL_ARCH::kCPU; | |||
| using kernel::KERNEL_ARCH::kNPU; | |||
| PrimitiveC *NPUPassUtils::CreateNchw2NhwcPrimitive() { | |||
| flatbuffers::FlatBufferBuilder fbb(1024); | |||
| auto val_offset = schema::CreateNchw2Nhwc(fbb); | |||
| auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nchw2Nhwc, val_offset.o); | |||
| fbb.Finish(prim_offset); | |||
| auto buf = fbb.GetBufferPointer(); | |||
| if (buf == nullptr) { | |||
| MS_LOG(ERROR) << "GetBufferPointer return nullptr"; | |||
| fbb.Clear(); | |||
| return nullptr; | |||
| } | |||
| auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize())); | |||
| if (primitive_buf == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc primitive_buf_ failed."; | |||
| fbb.Clear(); | |||
| return nullptr; | |||
| } | |||
| memcpy(primitive_buf, buf, fbb.GetSize()); | |||
| auto *primitive = PrimitiveC::NewPrimitiveC<Nchw2Nhwc>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf)); | |||
| free(primitive_buf); | |||
| fbb.Clear(); | |||
| return primitive; | |||
| } | |||
| PrimitiveC *NPUPassUtils::CreateNhwc2NchwPrimitive() { | |||
| flatbuffers::FlatBufferBuilder fbb(1024); | |||
| auto val_offset = schema::CreateNhwc2Nchw(fbb); | |||
| auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nhwc2Nchw, val_offset.o); | |||
| fbb.Finish(prim_offset); | |||
| auto buf = fbb.GetBufferPointer(); | |||
| if (buf == nullptr) { | |||
| MS_LOG(ERROR) << "GetBufferPointer return nullptr"; | |||
| fbb.Clear(); | |||
| return nullptr; | |||
| } | |||
| auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize())); | |||
| if (primitive_buf == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc primitive_buf_ failed."; | |||
| fbb.Clear(); | |||
| return nullptr; | |||
| } | |||
| memcpy(primitive_buf, buf, fbb.GetSize()); | |||
| auto *primitive = PrimitiveC::NewPrimitiveC<Nhwc2Nchw>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf)); | |||
| free(primitive_buf); | |||
| fbb.Clear(); | |||
| return primitive; | |||
| } | |||
| kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors, | |||
| const std::vector<Tensor *> &out_tensors, | |||
| const InnerContext *ctx, const std::string &name) { | |||
| kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nchw2Nhwc}; | |||
| auto nchw2nhwc_primitive = CreateNchw2NhwcPrimitive(); | |||
| auto *nchw2nhwc_kernel = | |||
| KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nchw2nhwc_primitive, ctx, key); | |||
| nchw2nhwc_kernel->set_name(name); | |||
| return nchw2nhwc_kernel; | |||
| } | |||
| kernel::LiteKernel *NPUPassUtils::CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors, | |||
| const std::vector<Tensor *> &out_tensors, | |||
| const InnerContext *ctx, const std::string &name) { | |||
| kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nhwc2Nchw}; | |||
| auto nhwc2nchw_primitive = CreateNhwc2NchwPrimitive(); | |||
| auto *nhwc2nchw_kernel = | |||
| KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nhwc2nchw_primitive, ctx, key); | |||
| nhwc2nchw_kernel->set_name(name); | |||
| return nhwc2nchw_kernel; | |||
| } | |||
| void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels, | |||
| const std::vector<kernel::LiteKernel *> &out_kernels, | |||
| const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors) { | |||
| kernel->set_in_tensors(in_tensors); | |||
| kernel->set_out_tensors(out_tensors); | |||
| kernel->set_in_kernels(in_kernels); | |||
| kernel->set_out_kernels(out_kernels); | |||
| } | |||
| } // namespace mindspore::lite | |||
| @@ -0,0 +1,44 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_ | |||
| #include <vector> | |||
| #include <string> | |||
| #include "src/ops/primitive_c.h" | |||
| #include "src/lite_kernel.h" | |||
| namespace mindspore::lite { | |||
| class NPUPassUtils { | |||
| public: | |||
| static kernel::LiteKernel *CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors, | |||
| const std::vector<Tensor *> &out_tensors, const InnerContext *ctx, | |||
| const std::string &name); | |||
| static kernel::LiteKernel *CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors, | |||
| const std::vector<Tensor *> &out_tensors, const InnerContext *ctx, | |||
| const std::string &name); | |||
| static void UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels, | |||
| const std::vector<kernel::LiteKernel *> &out_kernels, | |||
| const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors); | |||
| private: | |||
| static PrimitiveC *CreateNchw2NhwcPrimitive(); | |||
| static PrimitiveC *CreateNhwc2NchwPrimitive(); | |||
| }; | |||
| } // namespace mindspore::lite | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_ | |||
| @@ -0,0 +1,201 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/agent/npu/npu_transform_pass.h" | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "src/runtime/agent/npu/npu_manager.h" | |||
| #include "src/runtime/agent/npu/npu_pass_utils.h" | |||
| namespace mindspore::lite { | |||
| using kernel::KERNEL_ARCH::kCPU; | |||
| using kernel::KERNEL_ARCH::kNPU; | |||
| int NPUTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *after_kernel) { | |||
| std::vector<kernel::LiteKernel *> out_kernels; | |||
| for (auto out_kernel : kernel->out_kernels()) { | |||
| if (out_kernel == after_kernel) { | |||
| out_kernels.push_back(trans_kernel); | |||
| } else { | |||
| out_kernels.push_back(out_kernel); | |||
| } | |||
| } | |||
| NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors()); | |||
| return RET_OK; | |||
| } | |||
| int NPUTransformPass::UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *before_kernel) { | |||
| std::vector<lite::Tensor *> cur_kernel_in_tensors = {trans_kernel->out_tensors()[0]}; | |||
| for (int i = 1; i < kernel->in_tensors().size(); i++) { | |||
| cur_kernel_in_tensors.push_back(kernel->in_tensors()[i]); | |||
| } | |||
| std::vector<kernel::LiteKernel *> cur_in_kernels = {trans_kernel}; | |||
| for (int i = 0; i < kernel->in_kernels().size(); i++) { | |||
| auto in_kernel = kernel->in_kernels()[i]; | |||
| if (in_kernel != kernel) { | |||
| cur_in_kernels.push_back(in_kernel); | |||
| } | |||
| } | |||
| NPUPassUtils::UpdateKernel(kernel, cur_in_kernels, kernel->out_kernels(), cur_kernel_in_tensors, | |||
| kernel->out_tensors()); | |||
| return RET_OK; | |||
| } | |||
| int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||
| std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| auto kernel = *it; | |||
| bool is_input_kernel = kernel->in_kernels().empty(); | |||
| if (is_input_kernel || kernel->in_kernels()[0]->desc().arch != kNPU || | |||
| npu_trans_nodes.find(kernel->in_kernels()[0]->Type()) == npu_trans_nodes.end()) { | |||
| kernel::LiteKernel *before_kernel = nullptr; | |||
| if (!is_input_kernel) { | |||
| before_kernel = kernel->in_kernels()[0]; | |||
| } | |||
| // Create pre transform kernel out tensors. | |||
| std::vector<int> shapes{kernel->in_tensors()[0]->shape()[0], kernel->in_tensors()[0]->shape()[3], | |||
| kernel->in_tensors()[0]->shape()[1], kernel->in_tensors()[0]->shape()[2]}; | |||
| auto tensor = new Tensor(kernel->in_tensors()[0]->data_type(), shapes, schema::Format_NCHW, Tensor::VAR); | |||
| std::vector<Tensor *> pre_trans_out_tensors = {tensor}; | |||
| all_tensors->push_back(pre_trans_out_tensors[0]); | |||
| // Replace the output tensor of the previous node | |||
| auto name = kernel->name() + "_pre_trans" + "_Nhwc2Nchw_" + std::to_string(total++); | |||
| auto *pre_trans_kernel = | |||
| NPUPassUtils::CreateNhwc2NchwKernel({kernel->in_tensors()[0]}, pre_trans_out_tensors, context, name); | |||
| // Insert Nhwc2Nchw into the front of the current queue | |||
| all_kernels->push_back(pre_trans_kernel); | |||
| // Replace the output kernel of the previous node | |||
| std::vector<kernel::LiteKernel *> pre_trans_in_kernel; | |||
| if (is_input_kernel) { | |||
| pre_trans_in_kernel = {}; | |||
| } else { | |||
| pre_trans_in_kernel = {before_kernel}; | |||
| } | |||
| NPUPassUtils::UpdateKernel(pre_trans_kernel, pre_trans_in_kernel, {kernel}, {kernel->in_tensors()[0]}, | |||
| pre_trans_out_tensors); | |||
| if (before_kernel != nullptr) { | |||
| UpdateNH2NCTransNodePreKernel(before_kernel, pre_trans_kernel, kernel); | |||
| } | |||
| UpdateNH2NCTransNodeAfterKernel(kernel, pre_trans_kernel, before_kernel); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||
| std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| auto kernel = *it; | |||
| // Single output multiple references | |||
| for (int i = 0; i < kernel->out_kernels().size(); i++) { | |||
| auto next_kernel = kernel->out_kernels().at(i); | |||
| if (next_kernel->desc().arch == kNPU && npu_trans_nodes.find(next_kernel->Type()) != npu_trans_nodes.end()) { | |||
| continue; | |||
| } | |||
| // Change format the output of the current kernel nhwc->nchw | |||
| auto shapes = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[1], | |||
| kernel->out_tensors()[0]->shape()[2], kernel->out_tensors()[0]->shape()[3]}; | |||
| auto tensor = new Tensor(kernel->out_tensors()[0]->data_type(), shapes, schema::Format_NHWC, Tensor::VAR); | |||
| std::vector<Tensor *> post_trans_out_tensors = {tensor}; | |||
| all_tensors->push_back(post_trans_out_tensors[0]); | |||
| // Use the output tensor of the current node as the input tensor of the post-conversion operator | |||
| auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++); | |||
| auto *post_trans_kernel = | |||
| NPUPassUtils::CreateNchw2NhwcKernel(kernel->out_tensors(), post_trans_out_tensors, context, name); | |||
| // Replace the input tensor of the next node | |||
| NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {next_kernel}, kernel->out_tensors(), | |||
| post_trans_out_tensors); | |||
| // Directly insert in the back, will not affect the topological sort | |||
| all_kernels->push_back(post_trans_kernel); | |||
| UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, next_kernel); | |||
| UpdateNC2NHTransNodeAfterKernel(kernel, post_trans_kernel, next_kernel); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int NPUTransformPass::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *next_kernel) { | |||
| std::vector<kernel::LiteKernel *> cur_out_kernels; | |||
| for (auto out_kernel : kernel->out_kernels()) { | |||
| if (out_kernel == next_kernel) { | |||
| cur_out_kernels.push_back(trans_kernel); | |||
| } else { | |||
| cur_out_kernels.push_back(out_kernel); | |||
| } | |||
| } | |||
| auto kernel_out_tensor = kernel->out_tensors()[0]; | |||
| // Change format the output of the current kernel nhwc->nchw | |||
| std::vector<int> kernel_out_new_shapes = {kernel_out_tensor->shape()[0], kernel_out_tensor->shape()[3], | |||
| kernel_out_tensor->shape()[1], kernel_out_tensor->shape()[2]}; | |||
| kernel_out_tensor->set_format(schema::Format_NCHW); | |||
| kernel_out_tensor->set_shape(kernel_out_new_shapes); | |||
| NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), cur_out_kernels, kernel->in_tensors(), {kernel_out_tensor}); | |||
| return RET_OK; | |||
| } | |||
| int NPUTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *next_kernel) { | |||
| std::vector<Tensor *> next_in_tensors; | |||
| for (auto next_in_tensor : next_kernel->in_tensors()) { | |||
| if (next_in_tensor != kernel->out_tensors()[0]) { | |||
| next_in_tensors.push_back(next_in_tensor); | |||
| } else { | |||
| next_in_tensors.push_back(trans_kernel->out_tensors()[0]); | |||
| } | |||
| } | |||
| next_kernel->set_in_tensors(next_in_tensors); | |||
| std::vector<kernel::LiteKernel *> next_in_kernels; | |||
| for (auto in_kernel : next_kernel->in_kernels()) { | |||
| if (in_kernel == kernel) { | |||
| next_in_kernels.push_back(trans_kernel); | |||
| } else { | |||
| next_in_kernels.push_back(in_kernel); | |||
| } | |||
| } | |||
| NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors, | |||
| next_kernel->out_tensors()); | |||
| return RET_OK; | |||
| } | |||
| int NPUTransformPass::FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| if (context->IsNpuEnabled()) { | |||
| std::vector<kernel::LiteKernel *> new_kernels; | |||
| for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) { | |||
| auto kernel = *it; | |||
| if (kernel->desc().arch != kNPU) { | |||
| new_kernels.push_back(kernel); | |||
| continue; | |||
| } | |||
| if (npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) { | |||
| InsertPreNode(context, it, &new_kernels, all_tensors); | |||
| new_kernels.push_back(kernel); | |||
| InsertPostNode(context, it, &new_kernels, all_tensors); | |||
| } else { | |||
| new_kernels.push_back(kernel); | |||
| } | |||
| } | |||
| all_kernels->clear(); | |||
| for (int i = 0; i < new_kernels.size(); i++) { | |||
| all_kernels->push_back(new_kernels[i]); | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::lite | |||
| @@ -0,0 +1,51 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "src/ops/primitive_c.h" | |||
| namespace mindspore::lite { | |||
| class NPUTransformPass { | |||
| public: | |||
| int FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors); | |||
| private: | |||
| int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *after_kernel); | |||
| int UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *before_kernel); | |||
| int UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *after_kernel); | |||
| int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *next_kernel); | |||
| int InsertPreNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||
| std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors); | |||
| int InsertPostNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||
| std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors); | |||
| private: | |||
| int total = 0; | |||
| }; | |||
| } // namespace mindspore::lite | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_ | |||
| @@ -24,7 +24,6 @@ | |||
| #include "include/graph/model.h" | |||
| #include "include/hiai_ir_build.h" | |||
| #include "include/HiAiModelManagerType.h" | |||
| #include "include/context.h" | |||
| #include "include/version.h" | |||
| #include "src/common/utils.h" | |||
| #include "src/runtime/agent/npu/npu_converter_utils.h" | |||
| @@ -34,10 +33,6 @@ namespace mindspore::kernel { | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| std::set<schema::PrimitiveType> trans_nodes = {schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, | |||
| schema::PrimitiveType_DepthwiseConv2D, | |||
| schema::PrimitiveType_DeDepthwiseConv2D, schema::PrimitiveType_Resize}; | |||
| domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { | |||
| ge::Graph graph("NPUGraph"); | |||
| @@ -75,8 +70,7 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { | |||
| } | |||
| int SubGraphNpuKernel::Run() { | |||
| return reinterpret_cast<lite::NPUExecutor *>(this->executor_) | |||
| ->Run(in_tensors_, out_tensors_, nodes_, inputs_nhwc2nchw_, outputs_nchw2nhwc_); | |||
| return reinterpret_cast<lite::NPUExecutor *>(this->executor_)->Run(in_tensors_, out_tensors_, nodes_); | |||
| } | |||
| int SubGraphNpuKernel::BuildNPUInputOp() { | |||
| @@ -88,21 +82,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { | |||
| if (IsSubGraphInputTensor(in_tensor)) { | |||
| auto tensor_name = node->name() + "_" + std::to_string(count++); | |||
| hiai::op::Data *data; | |||
| if (trans_nodes.find(node->Type()) != trans_nodes.end()) { | |||
| auto shape = in_tensor->shape(); | |||
| data = new (std::nothrow) hiai::op::Data(tensor_name); | |||
| if (data == nullptr) { | |||
| MS_LOG(ERROR) << "New data failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ge::TensorDesc tensor_desc(lite::ConverterToNPUShape({shape[0], shape[3], shape[1], shape[2]}), | |||
| ge::FORMAT_NCHW, lite::ConverterToNPUDataType(in_tensor->data_type())); | |||
| data->update_input_desc_x(tensor_desc); | |||
| inputs_nhwc2nchw_.push_back(true); | |||
| } else { | |||
| data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name); | |||
| inputs_nhwc2nchw_.push_back(false); | |||
| } | |||
| data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name); | |||
| subgraph_input_op_.push_back(*data); | |||
| node_input_op.push_back(data); | |||
| continue; | |||
| @@ -132,7 +112,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { | |||
| // weight tensor | |||
| if (is_weight_tensor) { | |||
| if (trans_nodes.find(node->Type()) == trans_nodes.end()) { | |||
| if (lite::npu_trans_nodes.find(node->Type()) == lite::npu_trans_nodes.end()) { | |||
| auto name = node->name() + "_" + std::to_string(count++); | |||
| auto weight_const = new (std::nothrow) hiai::op::Const(node->name() + "_" + std::to_string(count++)); | |||
| if (weight_const == nullptr) { | |||
| @@ -162,11 +142,6 @@ std::vector<ge::Operator> SubGraphNpuKernel::GetNPUNodes(const vector<kernel::Li | |||
| ops.reserve(nodes.size()); | |||
| for (int i = 0; i < nodes.size(); i++) { | |||
| ops.push_back(*reinterpret_cast<NPUKernel *>(nodes[i])->GetNPUOp()); | |||
| if (trans_nodes.find(schema::PrimitiveType(nodes[i]->GetPrimitive()->Type())) != trans_nodes.end()) { | |||
| outputs_nchw2nhwc_.push_back(true); | |||
| } else { | |||
| outputs_nchw2nhwc_.push_back(false); | |||
| } | |||
| } | |||
| return ops; | |||
| } | |||
| @@ -69,10 +69,6 @@ class SubGraphNpuKernel : public SubGraphKernel { | |||
| std::string GetOMModelName(); | |||
| private: | |||
| std::vector<bool> inputs_nhwc2nchw_; | |||
| std::vector<bool> outputs_nchw2nhwc_; | |||
| domi::ModelBufferData *model_buffer_data_; | |||
| std::vector<ge::Operator> subgraph_input_op_; | |||
| @@ -16,6 +16,7 @@ | |||
| #include "src/runtime/kernel/npu/convolution_base_npu.h" | |||
| #include "src/runtime/agent/npu/npu_converter_utils.h" | |||
| #include "nnacl/pack.h" | |||
| namespace mindspore::kernel { | |||
| ConvolutionBaseNPUKernel::~ConvolutionBaseNPUKernel() { | |||
| @@ -39,14 +40,27 @@ int ConvolutionBaseNPUKernel::InitWeightBiasConst(const std::vector<lite::Tensor | |||
| MS_LOG(ERROR) << "New weight const failed."; | |||
| return RET_ERROR; | |||
| } | |||
| auto weight_shape = inputs[1]->shape(); | |||
| inputs[1]->set_shape({weight_shape[0], weight_shape[3], weight_shape[1], weight_shape[2]}); | |||
| inputs[1]->set_format(schema::Format_NCHW); | |||
| auto weight_tensor = mindspore::lite::ConverterToNPUTensor(inputs[1]); | |||
| weight_->set_attr_value(weight_tensor); | |||
| auto w_shape = inputs[1]->shape(); | |||
| auto nhwc_data = inputs[1]->data_c(); | |||
| auto nchw_data = reinterpret_cast<float *>(malloc(inputs[1]->ElementsNum() * sizeof(float))); | |||
| if (nchw_data == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| PackNHWCToNCHWFp32(nhwc_data, nchw_data, w_shape[0], w_shape[1] * w_shape[2], w_shape[3]); | |||
| inputs[1]->set_shape(weight_shape); | |||
| inputs[1]->set_format(schema::Format_NHWC); | |||
| std::shared_ptr<ge::Tensor> weight_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor()); | |||
| if (weight_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "new weight_tensor failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ge::TensorDesc tensor_desc(lite::ConverterToNPUShape({w_shape[0], w_shape[3], w_shape[1], w_shape[2]}), | |||
| ge::FORMAT_NCHW, lite::ConverterToNPUDataType(inputs[1]->data_type())); | |||
| weight_tensor->SetTensorDesc(tensor_desc); | |||
| weight_tensor->SetData(reinterpret_cast<const uint8_t *>(nchw_data), inputs[1]->Size()); | |||
| weight_->set_attr_value(weight_tensor); | |||
| free(nchw_data); | |||
| if (inputs.size() >= 3) { | |||
| bias_ = new (std::nothrow) hiai::op::Const(name_ + "_b"); | |||
| @@ -17,17 +17,18 @@ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CONVOLUTION_BASE_NPU_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "include/graph/op/all_ops.h" | |||
| #include "src/runtime/kernel/npu/transpose_base_npu.h" | |||
| #include "nnacl/conv_parameter.h" | |||
| namespace mindspore::kernel { | |||
| class ConvolutionBaseNPUKernel : public TransposeBaseNPUKernel { | |||
| class ConvolutionBaseNPUKernel : public NPUKernel { | |||
| public: | |||
| ConvolutionBaseNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : TransposeBaseNPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionBaseNPUKernel() override; | |||
| protected: | |||
| @@ -25,7 +25,7 @@ using mindspore::schema::PrimitiveType_DepthwiseConv2D; | |||
| namespace mindspore::kernel { | |||
| int ConvolutionDepthwiseNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) { | |||
| return RET_ERROR; | |||
| return RET_OK; | |||
| } | |||
| int ConvolutionDepthwiseNPUKernel::SetConvDwParam() { | |||
| @@ -49,19 +49,13 @@ int ConvolutionDepthwiseNPUKernel::SetConvDwParam() { | |||
| int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) { | |||
| auto ret = SetPreTranspose(npu_inputs[0]); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // set conv attr param | |||
| conv_dw_ = new (std::nothrow) hiai::op::ConvolutionDepthwise(name_ + "_conv_depthwise"); | |||
| if (conv_dw_ == nullptr) { | |||
| MS_LOG(ERROR) << "New convolution depthwise operator for op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = SetConvDwParam(); | |||
| auto ret = SetConvDwParam(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Set npu op parameter for convolution depthwise op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| @@ -76,7 +70,7 @@ int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor * | |||
| if (inputs.size() == 3) { | |||
| conv_dw_->set_input_bias(*bias_); | |||
| } | |||
| conv_dw_->set_input_x(*pre_trans_); | |||
| conv_dw_->set_input_x(*npu_inputs[0]); | |||
| if (conv_param_->act_type_ != ActType_No) { | |||
| ret = SetActivation(conv_dw_, conv_param_->act_type_); | |||
| @@ -85,21 +79,17 @@ int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor * | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| ge::Operator *mindspore::kernel::ConvolutionDepthwiseNPUKernel::GetNPUOp() { | |||
| if (conv_param_->act_type_ == ActType_No) { | |||
| ret = SetPostTranspose(conv_dw_); | |||
| return conv_dw_; | |||
| } else { | |||
| ret = SetPostTranspose(act_); | |||
| } | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| return act_; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| ge::Operator *mindspore::kernel::ConvolutionDepthwiseNPUKernel::GetNPUOp() { return post_trans_; } | |||
| ConvolutionDepthwiseNPUKernel::~ConvolutionDepthwiseNPUKernel() { | |||
| if (conv_dw_ != nullptr) { | |||
| delete conv_dw_; | |||
| @@ -24,7 +24,7 @@ using mindspore::schema::PrimitiveType_Conv2D; | |||
| namespace mindspore::kernel { | |||
| int ConvolutionNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) { | |||
| return RET_ERROR; | |||
| return RET_OK; | |||
| } | |||
| int ConvolutionNPUKernel::SetConvParam() { | |||
| @@ -49,19 +49,13 @@ int ConvolutionNPUKernel::SetConvParam() { | |||
| int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) { | |||
| auto ret = SetPreTranspose(npu_inputs[0]); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // set conv attr param | |||
| conv_ = new (std::nothrow) hiai::op::Convolution(name_ + "_conv"); | |||
| if (conv_ == nullptr) { | |||
| MS_LOG(ERROR) << "New convolution operator for convolution op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = SetConvParam(); | |||
| auto ret = SetConvParam(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Set npu op parameter for convolution op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| @@ -76,7 +70,7 @@ int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs | |||
| if (inputs.size() == 3) { | |||
| conv_->set_input_bias(*bias_); | |||
| } | |||
| conv_->set_input_x(*pre_trans_); | |||
| conv_->set_input_x(*npu_inputs[0]); | |||
| if (conv_param_->act_type_ != ActType_No) { | |||
| ret = SetActivation(conv_, conv_param_->act_type_); | |||
| @@ -85,21 +79,17 @@ int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| ge::Operator *mindspore::kernel::ConvolutionNPUKernel::GetNPUOp() { | |||
| if (conv_param_->act_type_ == ActType_No) { | |||
| ret = SetPostTranspose(conv_); | |||
| return conv_; | |||
| } else { | |||
| ret = SetPostTranspose(act_); | |||
| } | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| return act_; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| ge::Operator *mindspore::kernel::ConvolutionNPUKernel::GetNPUOp() { return post_trans_; } | |||
| ConvolutionNPUKernel::~ConvolutionNPUKernel() { | |||
| if (conv_ != nullptr) { | |||
| delete conv_; | |||
| @@ -62,23 +62,17 @@ int PoolingNPUKernel::SetPoolingParam() { | |||
| int PoolingNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) { | |||
| auto ret = SetPreTranspose(npu_inputs[0]); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| pooling_ = new (std::nothrow) hiai::op::PoolingD(name_ + "_pooling"); | |||
| if (pooling_ == nullptr) { | |||
| MS_LOG(ERROR) << "New pooling npu operator for op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = SetPoolingParam(); | |||
| auto ret = SetPoolingParam(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Set npu op parameter for convolution op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| pooling_->set_input_x(*pre_trans_); | |||
| pooling_->set_input_x(*npu_inputs[0]); | |||
| if (pooling_param_->act_type_ != ActType_No) { | |||
| ret = SetActivation(pooling_, pooling_param_->act_type_); | |||
| @@ -87,21 +81,17 @@ int PoolingNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| ge::Operator *mindspore::kernel::PoolingNPUKernel::GetNPUOp() { | |||
| if (pooling_param_->act_type_ == ActType_No) { | |||
| ret = SetPostTranspose(pooling_); | |||
| return pooling_; | |||
| } else { | |||
| ret = SetPostTranspose(act_); | |||
| return act_; | |||
| } | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| ge::Operator *mindspore::kernel::PoolingNPUKernel::GetNPUOp() { return post_trans_; } | |||
| PoolingNPUKernel::~PoolingNPUKernel() { | |||
| if (pooling_ != nullptr) { | |||
| delete pooling_; | |||
| @@ -36,12 +36,6 @@ int ResizeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const | |||
| int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) { | |||
| auto ret = SetPreTranspose(npu_inputs[0]); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ge::TensorDesc sizeTensorDesc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_INT32); | |||
| ge::TensorPtr sizeTensor = std::make_shared<hiai::Tensor>(sizeTensorDesc); | |||
| vector<int32_t> dataValue = {static_cast<int32_t>(new_height_), static_cast<int32_t>(new_width_)}; | |||
| @@ -55,7 +49,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||
| return RET_ERROR; | |||
| } | |||
| op->set_attr_align_corners(align_corners_); | |||
| op->set_input_x(*pre_trans_); | |||
| op->set_input_x(*npu_inputs[0]); | |||
| op->set_input_size(*out_size); | |||
| op->set_attr_half_pixel_centers(preserve_aspect_ratio_); | |||
| op_ = op; | |||
| @@ -66,21 +60,14 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||
| return RET_ERROR; | |||
| } | |||
| op->set_attr_align_corners(align_corners_); | |||
| op->set_input_x(*pre_trans_); | |||
| op->set_input_x(*npu_inputs[0]); | |||
| op->set_input_size(*out_size); | |||
| op_ = op; | |||
| } | |||
| ret = SetPostTranspose(op_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| ge::Operator *mindspore::kernel::ResizeNPUKernel::GetNPUOp() { return this->post_trans_; } | |||
| ge::Operator *mindspore::kernel::ResizeNPUKernel::GetNPUOp() { return this->op_; } | |||
| ResizeNPUKernel::~ResizeNPUKernel() { | |||
| if (op_ != nullptr) { | |||
| @@ -24,12 +24,12 @@ | |||
| #include "include/graph/op/all_ops.h" | |||
| #include "src/runtime/kernel/npu/transpose_base_npu.h" | |||
| namespace mindspore::kernel { | |||
| class ResizeNPUKernel : public TransposeBaseNPUKernel { | |||
| class ResizeNPUKernel : public NPUKernel { | |||
| public: | |||
| ResizeNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : TransposeBaseNPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| auto resize_parameter = reinterpret_cast<ResizeParameter *>(parameter); | |||
| method_ = resize_parameter->method_; | |||
| new_height_ = resize_parameter->new_height_; | |||
| @@ -33,6 +33,8 @@ | |||
| #if SUPPORT_NPU | |||
| #include "src/runtime/agent/npu/subgraph_npu_kernel.h" | |||
| #include "src/runtime/agent/npu/npu_manager.h" | |||
| #include "src/runtime/agent/npu/npu_transform_pass.h" | |||
| #include "src/runtime/agent/npu/npu_fusion_pass.h" | |||
| #endif | |||
| namespace mindspore::lite { | |||
| using kernel::KERNEL_ARCH::kCPU; | |||
| @@ -63,6 +65,11 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) { | |||
| return ret; | |||
| } | |||
| FindAllInoutKernels(*dst_kernels); | |||
| ret = RunPass(dst_kernels); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Schedule run pass failed."; | |||
| return ret; | |||
| } | |||
| ret = ConstructSubGraphs(dst_kernels); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConstructSubGraphs failed."; | |||
| @@ -514,4 +521,25 @@ void Scheduler::FindAllInoutKernels(const std::vector<kernel::LiteKernel *> &ker | |||
| kernel->FindInoutKernels(kernels); | |||
| } | |||
| } | |||
| int Scheduler::RunPass(std::vector<kernel::LiteKernel *> *dst_kernels) { | |||
| int ret = RET_OK; | |||
| #if SUPPORT_NPU | |||
| auto transform_pass = new NPUTransformPass; | |||
| ret = transform_pass->FormatTransformPass(context_, dst_kernels, &src_tensors_); | |||
| delete transform_pass; | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Run npu format transform pass failed."; | |||
| return ret; | |||
| } | |||
| auto fusion_pass = new NPUFusionPass(dst_kernels); | |||
| ret = fusion_pass->Fusion(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Run npu fussion transform pass failed."; | |||
| return ret; | |||
| } | |||
| delete fusion_pass; | |||
| #endif | |||
| return ret; | |||
| } | |||
| } // namespace mindspore::lite | |||
| @@ -77,6 +77,8 @@ class Scheduler { | |||
| static kernel::SubGraphType GetKernelSubGraphType(const kernel::LiteKernel *kernel); | |||
| int RunPass(std::vector<kernel::LiteKernel *> *dst_kernels); | |||
| protected: | |||
| const InnerContext *context_ = nullptr; | |||
| Model *src_model_ = nullptr; | |||