| @@ -49,6 +49,7 @@ bool NPUManager::CheckEMUIVersion() { | |||||
| auto version = emui_str.substr(pos + 1); | auto version = emui_str.substr(pos + 1); | ||||
| int ret = CompareVersion(version, "10.0.0"); | int ret = CompareVersion(version, "10.0.0"); | ||||
| if (ret < 0) { | if (ret < 0) { | ||||
| MS_LOG(WARNING) << "EMUI version " << version << " less than 10.0.0"; | |||||
| return false; | return false; | ||||
| } | } | ||||
| } | } | ||||
| @@ -80,8 +81,9 @@ bool NPUManager::CheckDDKVersion() { | |||||
| auto client = std::make_shared<hiai::AiModelMngerClient>(); | auto client = std::make_shared<hiai::AiModelMngerClient>(); | ||||
| if (client->GetVersion() != nullptr) { | if (client->GetVersion() != nullptr) { | ||||
| std::string version = client->GetVersion(); | std::string version = client->GetVersion(); | ||||
| int ret = CompareVersion(version, "100.330.010.011"); | |||||
| int ret = CompareVersion(version, "100.320.010.023"); | |||||
| if (ret < 0) { | if (ret < 0) { | ||||
| MS_LOG(WARNING) << "DDK Version " << version << " less than 100.320.010.023"; | |||||
| return false; | return false; | ||||
| } | } | ||||
| } | } | ||||
| @@ -96,7 +98,7 @@ bool NPUManager::IsSupportNPU() { | |||||
| MS_LOG(INFO) << "The current device support NPU."; | MS_LOG(INFO) << "The current device support NPU."; | ||||
| } else { | } else { | ||||
| is_support_ = false; | is_support_ = false; | ||||
| MS_LOG(INFO) << "The current device NOT SUPPORT NPU."; | |||||
| MS_LOG(WARNING) << "The current device NOT SUPPORT NPU."; | |||||
| } | } | ||||
| return is_support_; | return is_support_; | ||||
| } else { | } else { | ||||
| @@ -130,6 +132,7 @@ bool NPUManager::IsKirinChip() { | |||||
| cpu_info.close(); | cpu_info.close(); | ||||
| return true; | return true; | ||||
| } else { | } else { | ||||
| MS_LOG(WARNING) << "Unsupported KirinChip " << kirin_number; | |||||
| cpu_info.close(); | cpu_info.close(); | ||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "src/runtime/agent/npu/optimizer/npu_fusion_pass.h" | #include "src/runtime/agent/npu/optimizer/npu_fusion_pass.h" | ||||
| #include <vector> | #include <vector> | ||||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | |||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "nnacl/concat_parameter.h" | #include "nnacl/concat_parameter.h" | ||||
| @@ -22,14 +23,16 @@ namespace mindspore::lite { | |||||
| bool CheckFusion(kernel::LiteKernel *kernel) { | bool CheckFusion(kernel::LiteKernel *kernel) { | ||||
| auto pre_flag = | auto pre_flag = | ||||
| std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *in_kernel) { | std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *in_kernel) { | ||||
| return in_kernel->Type() == schema::PrimitiveType_Nchw2Nhwc && in_kernel->out_kernels().size() == 1; | |||||
| return NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(in_kernel)) && | |||||
| in_kernel->out_kernels().size() == 1; | |||||
| }); | }); | ||||
| if (!pre_flag) { | if (!pre_flag) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| auto post_flag = std::all_of( | |||||
| kernel->out_kernels().begin(), kernel->out_kernels().end(), | |||||
| [](const kernel::LiteKernel *out_kernel) { return out_kernel->Type() == schema::PrimitiveType_Nhwc2Nchw; }); | |||||
| auto post_flag = | |||||
| std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *out_kernel) { | |||||
| return NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(out_kernel)); | |||||
| }); | |||||
| return post_flag; | return post_flag; | ||||
| } | } | ||||
| @@ -37,15 +40,17 @@ bool CheckFormatFusion(kernel::LiteKernel *kernel) { | |||||
| if (kernel->out_kernels().empty()) { | if (kernel->out_kernels().empty()) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| if (kernel->Type() == schema::PrimitiveType_Nhwc2Nchw) { | |||||
| return std::all_of( | |||||
| kernel->out_kernels().begin(), kernel->out_kernels().end(), | |||||
| [](const kernel::LiteKernel *kernel) { return kernel->Type() == schema::PrimitiveType_Nchw2Nhwc; }); | |||||
| if (NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(kernel))) { | |||||
| return std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), | |||||
| [](const kernel::LiteKernel *kernel) { | |||||
| return NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(kernel)); | |||||
| }); | |||||
| } | } | ||||
| if (kernel->Type() == schema::PrimitiveType_Nchw2Nhwc) { | |||||
| return std::all_of( | |||||
| kernel->out_kernels().begin(), kernel->out_kernels().end(), | |||||
| [](const kernel::LiteKernel *kernel) { return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw; }); | |||||
| if (NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(kernel))) { | |||||
| return std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), | |||||
| [](const kernel::LiteKernel *kernel) { | |||||
| return NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(kernel)); | |||||
| }); | |||||
| } | } | ||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -60,6 +65,10 @@ void NPUFusionPass::RemoveAndFreeKernel(kernel::LiteKernel *cur_kernel) { | |||||
| void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) { | void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) { | ||||
| for (auto in_kernel : cur_kernel->in_kernels()) { | for (auto in_kernel : cur_kernel->in_kernels()) { | ||||
| // graph in kernel | |||||
| if (in_kernel->in_kernels().empty()) { | |||||
| continue; | |||||
| } | |||||
| auto pre_kernel = in_kernel->in_kernels()[0]; | auto pre_kernel = in_kernel->in_kernels()[0]; | ||||
| auto pre_out_kernels = pre_kernel->out_kernels(); | auto pre_out_kernels = pre_kernel->out_kernels(); | ||||
| @@ -85,6 +94,10 @@ void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) { | |||||
| void NPUFusionPass::UpdatePostKernels(kernel::LiteKernel *cur_kernel) { | void NPUFusionPass::UpdatePostKernels(kernel::LiteKernel *cur_kernel) { | ||||
| for (auto out_kernel : cur_kernel->out_kernels()) { | for (auto out_kernel : cur_kernel->out_kernels()) { | ||||
| // graph out kernel | |||||
| if (out_kernel->out_kernels().empty()) { | |||||
| continue; | |||||
| } | |||||
| auto post_kernel = out_kernel->out_kernels()[0]; | auto post_kernel = out_kernel->out_kernels()[0]; | ||||
| auto post_in_kernels = post_kernel->in_kernels(); | auto post_in_kernels = post_kernel->in_kernels(); | ||||
| @@ -183,22 +196,13 @@ int NPUFusionPass::ConcatFusion(kernel::LiteKernel *kernel) { | |||||
| int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) { | int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) { | ||||
| auto pre_kernel = kernel->in_kernels()[0]; | auto pre_kernel = kernel->in_kernels()[0]; | ||||
| auto in_tensor = kernel->in_tensors()[0]; | auto in_tensor = kernel->in_tensors()[0]; | ||||
| auto out_tensor = kernel->out_tensors()[0]; | |||||
| auto tensor_itr = std::find(pre_kernel->out_tensors().begin(), pre_kernel->out_tensors().end(), in_tensor); | |||||
| if (tensor_itr != pre_kernel->out_tensors().end()) { | |||||
| in_tensor = *tensor_itr; | |||||
| } else { | |||||
| MS_LOG(ERROR) << "Can't find the connneted tensor between kernel " << kernel->name() << " and it's pre_kernel."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| std::vector<kernel::LiteKernel *> pre_insert_kernels; | std::vector<kernel::LiteKernel *> pre_insert_kernels; | ||||
| for (const auto &trans_kernel : kernel->out_kernels()) { | for (const auto &trans_kernel : kernel->out_kernels()) { | ||||
| for (const auto &post_kernel : trans_kernel->out_kernels()) { | for (const auto &post_kernel : trans_kernel->out_kernels()) { | ||||
| // update tensor | // update tensor | ||||
| auto tensors_vec = post_kernel->in_tensors(); | auto tensors_vec = post_kernel->in_tensors(); | ||||
| for (size_t i = 0; i < tensors_vec.size(); i++) { | for (size_t i = 0; i < tensors_vec.size(); i++) { | ||||
| if (tensors_vec[i] == out_tensor) { | |||||
| if (tensors_vec[i] == trans_kernel->out_tensors()[0]) { | |||||
| tensors_vec[i] = in_tensor; | tensors_vec[i] = in_tensor; | ||||
| break; | break; | ||||
| } | } | ||||
| @@ -218,10 +222,7 @@ int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) { | |||||
| RemoveAndFreeKernel(trans_kernel); | RemoveAndFreeKernel(trans_kernel); | ||||
| } | } | ||||
| } | } | ||||
| auto pre_out_kernels = pre_kernel->out_kernels(); | |||||
| auto itr = find(pre_out_kernels.begin(), pre_out_kernels.end(), kernel); | |||||
| pre_out_kernels.insert(itr, pre_insert_kernels.begin(), pre_insert_kernels.end()); | |||||
| pre_kernel->set_in_kernels(pre_out_kernels); | |||||
| pre_kernel->set_out_kernels(pre_insert_kernels); | |||||
| RemoveAndFreeKernel(kernel); | RemoveAndFreeKernel(kernel); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -229,7 +230,8 @@ int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) { | |||||
| int NPUFusionPass::Run() { | int NPUFusionPass::Run() { | ||||
| for (size_t i = 0; i < kernels->size(); i++) { | for (size_t i = 0; i < kernels->size(); i++) { | ||||
| auto kernel = (*kernels)[i]; | auto kernel = (*kernels)[i]; | ||||
| if (kernel->Type() == schema::PrimitiveType_Nchw2Nhwc || kernel->Type() == schema::PrimitiveType_Nchw2Nhwc) { | |||||
| if (NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(kernel)) || | |||||
| NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(kernel))) { | |||||
| if (CheckFormatFusion(kernel)) { | if (CheckFormatFusion(kernel)) { | ||||
| i--; | i--; | ||||
| FormatFusion(kernel); | FormatFusion(kernel); | ||||
| @@ -30,11 +30,13 @@ int GetInsertState(kernel::LiteKernel *kernel) { | |||||
| return InsertNone; | return InsertNone; | ||||
| } | } | ||||
| auto pre_flag = | auto pre_flag = | ||||
| std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), | |||||
| [](const kernel::LiteKernel *kernel) { return kernel->Type() == schema::PrimitiveType_Nchw2Nhwc; }); | |||||
| std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *kernel) { | |||||
| return NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(kernel)); | |||||
| }); | |||||
| auto post_flag = | auto post_flag = | ||||
| std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), | |||||
| [](const kernel::LiteKernel *kernel) { return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw; }); | |||||
| std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) { | |||||
| return NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(kernel)); | |||||
| }); | |||||
| if (pre_flag && !post_flag) { | if (pre_flag && !post_flag) { | ||||
| return PostInsert; | return PostInsert; | ||||
| } | } | ||||
| @@ -48,7 +50,7 @@ int NPUInsertTransformPass::InsertPreNode(const InnerContext *context, kernel::L | |||||
| std::vector<kernel::LiteKernel *> *trans_kernels, | std::vector<kernel::LiteKernel *> *trans_kernels, | ||||
| std::vector<Tensor *> *all_tensors) { | std::vector<Tensor *> *all_tensors) { | ||||
| for (auto in_kernel : kernel->in_kernels()) { | for (auto in_kernel : kernel->in_kernels()) { | ||||
| if (in_kernel->Type() == schema::PrimitiveType_Nchw2Nhwc) { | |||||
| if (NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(in_kernel))) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| auto nhwc_shape = in_kernel->out_tensors()[0]->shape(); | auto nhwc_shape = in_kernel->out_tensors()[0]->shape(); | ||||
| @@ -86,7 +88,7 @@ int NPUInsertTransformPass::InsertPostNode(const InnerContext *context, kernel:: | |||||
| std::vector<kernel::LiteKernel *> *trans_kernels, | std::vector<kernel::LiteKernel *> *trans_kernels, | ||||
| std::vector<Tensor *> *all_tensors) { | std::vector<Tensor *> *all_tensors) { | ||||
| for (auto out_kernel : kernel->out_kernels()) { | for (auto out_kernel : kernel->out_kernels()) { | ||||
| if (out_kernel->Type() == schema::PrimitiveType_Nhwc2Nchw) { | |||||
| if (NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(out_kernel))) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| auto nhwc_shape = kernel->out_tensors()[0]->shape(); | auto nhwc_shape = kernel->out_tensors()[0]->shape(); | ||||
| @@ -14,17 +14,19 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "src/kernel_registry.h" | |||||
| #include "src/ops/nhwc2nchw.h" | |||||
| #include "src/ops/nchw2nhwc.h" | |||||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | ||||
| #include "src/ops/transpose.h" | |||||
| #include "nnacl/transpose.h" | |||||
| #include "src/ops/populate/populate_register.h" | |||||
| #include "src/runtime/kernel/arm/fp32/transpose_fp32.h" | |||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| using kernel::KERNEL_ARCH::kCPU; | using kernel::KERNEL_ARCH::kCPU; | ||||
| using kernel::KERNEL_ARCH::kNPU; | using kernel::KERNEL_ARCH::kNPU; | ||||
| PrimitiveC *NPUPassUtils::CreateNchw2NhwcPrimitive() { | |||||
| PrimitiveC *NPUPassUtils::CreateTransposePrimitive() { | |||||
| flatbuffers::FlatBufferBuilder fbb(1024); | flatbuffers::FlatBufferBuilder fbb(1024); | ||||
| auto val_offset = schema::CreateNchw2Nhwc(fbb); | auto val_offset = schema::CreateNchw2Nhwc(fbb); | ||||
| auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nchw2Nhwc, val_offset.o); | |||||
| auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Transpose, val_offset.o); | |||||
| fbb.Finish(prim_offset); | fbb.Finish(prim_offset); | ||||
| auto buf = fbb.GetBufferPointer(); | auto buf = fbb.GetBufferPointer(); | ||||
| if (buf == nullptr) { | if (buf == nullptr) { | ||||
| @@ -39,56 +41,72 @@ PrimitiveC *NPUPassUtils::CreateNchw2NhwcPrimitive() { | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| memcpy(primitive_buf, buf, fbb.GetSize()); | memcpy(primitive_buf, buf, fbb.GetSize()); | ||||
| auto *primitive = PrimitiveC::NewPrimitiveC<Nchw2Nhwc>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf)); | |||||
| auto *primitive = PrimitiveC::NewPrimitiveC<Transpose>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf)); | |||||
| free(primitive_buf); | free(primitive_buf); | ||||
| fbb.Clear(); | fbb.Clear(); | ||||
| return primitive; | return primitive; | ||||
| } | } | ||||
| PrimitiveC *NPUPassUtils::CreateNhwc2NchwPrimitive() { | |||||
| flatbuffers::FlatBufferBuilder fbb(1024); | |||||
| auto val_offset = schema::CreateNhwc2Nchw(fbb); | |||||
| auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nhwc2Nchw, val_offset.o); | |||||
| fbb.Finish(prim_offset); | |||||
| auto buf = fbb.GetBufferPointer(); | |||||
| if (buf == nullptr) { | |||||
| MS_LOG(ERROR) << "GetBufferPointer return nullptr"; | |||||
| fbb.Clear(); | |||||
| kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors, | |||||
| const std::vector<Tensor *> &out_tensors, | |||||
| const InnerContext *ctx, const std::string &name) { | |||||
| kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Transpose}; | |||||
| auto nchw2nhwc_primitive = CreateTransposePrimitive(); | |||||
| auto *transpose_param = reinterpret_cast<TransposeParameter *>(malloc(sizeof(TransposeParameter))); | |||||
| if (transpose_param == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc TransposeParameter failed."; | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize())); | |||||
| if (primitive_buf == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc primitive buffer failed."; | |||||
| fbb.Clear(); | |||||
| memset(transpose_param, 0, sizeof(TransposeParameter)); | |||||
| transpose_param->op_parameter_.type_ = nchw2nhwc_primitive->Type(); | |||||
| transpose_param->perm_[0] = 0; | |||||
| transpose_param->perm_[1] = 2; | |||||
| transpose_param->perm_[2] = 3; | |||||
| transpose_param->perm_[3] = 1; | |||||
| transpose_param->num_axes_ = 4; | |||||
| auto kernel = new (std::nothrow) kernel::TransposeCPUKernel(reinterpret_cast<OpParameter *>(transpose_param), | |||||
| in_tensors, out_tensors, ctx, nchw2nhwc_primitive); | |||||
| if (kernel != nullptr) { | |||||
| kernel->set_desc(key); | |||||
| } else { | |||||
| MS_LOG(ERROR) << "New Nchw2Nhwc Kernel failed."; | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| memcpy(primitive_buf, buf, fbb.GetSize()); | |||||
| auto *primitive = PrimitiveC::NewPrimitiveC<Nhwc2Nchw>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf)); | |||||
| free(primitive_buf); | |||||
| fbb.Clear(); | |||||
| return primitive; | |||||
| } | |||||
| kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors, | |||||
| const std::vector<Tensor *> &out_tensors, | |||||
| const InnerContext *ctx, const std::string &name) { | |||||
| kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nchw2Nhwc}; | |||||
| auto nchw2nhwc_primitive = CreateNchw2NhwcPrimitive(); | |||||
| auto *nchw2nhwc_kernel = | |||||
| KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nchw2nhwc_primitive, ctx, key); | |||||
| nchw2nhwc_kernel->set_name(name); | |||||
| return nchw2nhwc_kernel; | |||||
| kernel->set_name(name); | |||||
| return kernel; | |||||
| } | } | ||||
| kernel::LiteKernel *NPUPassUtils::CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors, | kernel::LiteKernel *NPUPassUtils::CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors, | ||||
| const std::vector<Tensor *> &out_tensors, | const std::vector<Tensor *> &out_tensors, | ||||
| const InnerContext *ctx, const std::string &name) { | const InnerContext *ctx, const std::string &name) { | ||||
| kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nhwc2Nchw}; | |||||
| auto nhwc2nchw_primitive = CreateNhwc2NchwPrimitive(); | |||||
| auto *nhwc2nchw_kernel = | |||||
| KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nhwc2nchw_primitive, ctx, key); | |||||
| nhwc2nchw_kernel->set_name(name); | |||||
| return nhwc2nchw_kernel; | |||||
| kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Transpose}; | |||||
| auto nhwc2nchw_primitive = CreateTransposePrimitive(); | |||||
| auto *transpose_param = reinterpret_cast<TransposeParameter *>(malloc(sizeof(TransposeParameter))); | |||||
| if (transpose_param == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc TransposeParameter failed."; | |||||
| return nullptr; | |||||
| } | |||||
| memset(transpose_param, 0, sizeof(TransposeParameter)); | |||||
| transpose_param->op_parameter_.type_ = nhwc2nchw_primitive->Type(); | |||||
| transpose_param->perm_[0] = 0; | |||||
| transpose_param->perm_[1] = 3; | |||||
| transpose_param->perm_[2] = 1; | |||||
| transpose_param->perm_[3] = 2; | |||||
| transpose_param->num_axes_ = 4; | |||||
| auto kernel = new (std::nothrow) kernel::TransposeCPUKernel(reinterpret_cast<OpParameter *>(transpose_param), | |||||
| in_tensors, out_tensors, ctx, nhwc2nchw_primitive); | |||||
| if (kernel != nullptr) { | |||||
| kernel->set_desc(key); | |||||
| } else { | |||||
| MS_LOG(ERROR) << "New Nhwc2Nchw Kernel failed."; | |||||
| return nullptr; | |||||
| } | |||||
| kernel->set_name(name); | |||||
| return kernel; | |||||
| } | } | ||||
| void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels, | void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels, | ||||
| @@ -173,4 +191,39 @@ void NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, k | |||||
| post_kernel->set_in_kernels(post_in_kernels); | post_kernel->set_in_kernels(post_in_kernels); | ||||
| post_kernel->set_in_tensors({post_in_tensors}); | post_kernel->set_in_tensors({post_in_tensors}); | ||||
| } | } | ||||
| bool NPUPassUtils::IsNhwc2Nchw(kernel::LiteKernel *kernel) { | |||||
| if (kernel->Type() != schema::PrimitiveType_Transpose) { | |||||
| return false; | |||||
| } | |||||
| auto parameter = reinterpret_cast<TransposeParameter *>(kernel->op_parameter()); | |||||
| if (parameter->num_axes_ != 4) { | |||||
| return false; | |||||
| } | |||||
| std::vector<int> perm = {parameter->perm_[0], parameter->perm_[1], parameter->perm_[2], parameter->perm_[3]}; | |||||
| std::vector<int> nh2nc_perm = {0, 3, 1, 2}; | |||||
| if (nh2nc_perm == perm) { | |||||
| return true; | |||||
| } | |||||
| return false; | |||||
| } | |||||
| bool NPUPassUtils::IsNchw2Nhwc(kernel::LiteKernel *kernel) { | |||||
| if (kernel->Type() != schema::PrimitiveType_Transpose) { | |||||
| return false; | |||||
| } | |||||
| auto parameter = reinterpret_cast<TransposeParameter *>(kernel->op_parameter()); | |||||
| if (parameter->num_axes_ != 4) { | |||||
| return false; | |||||
| } | |||||
| std::vector<int> perm = {parameter->perm_[0], parameter->perm_[1], parameter->perm_[2], parameter->perm_[3]}; | |||||
| std::vector<int> nh2nc_perm = {0, 2, 3, 1}; | |||||
| if (nh2nc_perm == perm) { | |||||
| return true; | |||||
| } | |||||
| return false; | |||||
| } | |||||
| } // namespace mindspore::lite | } // namespace mindspore::lite | ||||
| @@ -47,10 +47,12 @@ class NPUPassUtils { | |||||
| static void UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | static void UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | ||||
| kernel::LiteKernel *post_kernel); | kernel::LiteKernel *post_kernel); | ||||
| private: | |||||
| static PrimitiveC *CreateNchw2NhwcPrimitive(); | |||||
| static bool IsNhwc2Nchw(kernel::LiteKernel *kernel); | |||||
| static bool IsNchw2Nhwc(kernel::LiteKernel *kernel); | |||||
| static PrimitiveC *CreateNhwc2NchwPrimitive(); | |||||
| private: | |||||
| static PrimitiveC *CreateTransposePrimitive(); | |||||
| }; | }; | ||||
| } // namespace mindspore::lite | } // namespace mindspore::lite | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_ | ||||
| @@ -103,7 +103,6 @@ int SubGraphNpuKernel::BuildNPUInputOp() { | |||||
| // input come from npu | // input come from npu | ||||
| auto npu_op = reinterpret_cast<NPUKernel *>(in_kernel)->GetNPUOp(); | auto npu_op = reinterpret_cast<NPUKernel *>(in_kernel)->GetNPUOp(); | ||||
| if (npu_op != nullptr) { | if (npu_op != nullptr) { | ||||
| npu_op->GetOutputDesc(0).GetName(); | |||||
| node_input_op.push_back(npu_op); | node_input_op.push_back(npu_op); | ||||
| is_weight_tensor = false; | is_weight_tensor = false; | ||||
| break; | break; | ||||
| @@ -168,14 +167,13 @@ std::string SubGraphNpuKernel::GetOMModelName() { return this->name_ + ".om"; } | |||||
| int SubGraphNpuKernel::Init() { | int SubGraphNpuKernel::Init() { | ||||
| if (!is_compiled_) { | if (!is_compiled_) { | ||||
| name_ = "kNpuSubGraph" + std::to_string(mindspore::lite::NPUManager::GetInstance()->index()); | |||||
| auto model_buffer_data = BuildIRModel(); | auto model_buffer_data = BuildIRModel(); | ||||
| if (model_buffer_data == nullptr) { | if (model_buffer_data == nullptr) { | ||||
| MS_LOG(ERROR) << "Build IR model failed."; | MS_LOG(ERROR) << "Build IR model failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| name_ = "kNpuSubGraph" + std::to_string(mindspore::lite::NPUManager::GetInstance()->index()); | |||||
| mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data, GetOMModelName(), | mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data, GetOMModelName(), | ||||
| context_->GetNpuInfo().frequency_); | context_->GetNpuInfo().frequency_); | ||||
| @@ -36,7 +36,7 @@ int CastNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const | |||||
| } | } | ||||
| op_->set_input_x(*npu_inputs[0]); | op_->set_input_x(*npu_inputs[0]); | ||||
| op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(cast_parameter_->dst_type_))); | op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(cast_parameter_->dst_type_))); | ||||
| op_->set_attr_src_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(cast_parameter_->src_type_))); | |||||
| op_->set_attr_src_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(inputs[0]->data_type()))); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -1,3 +1,3 @@ | |||||
| mobilenet_v1_1.0_224.tflite 1.5 | |||||
| squeezenet.tflite 1.5 | |||||
| inception_v3.tflite 0.5 | |||||
| mobilenet_v1_1.0_224.tflite 2.5 | |||||
| squeezenet.tflite 2.5 | |||||
| inception_v3.tflite 1 | |||||