From: @yangruoqi713 Reviewed-by: Signed-off-by:tags/v1.2.0-rc1
| @@ -15,6 +15,7 @@ | |||
| */ | |||
| #include "src/runtime/agent/npu/optimizer/npu_insert_transform_pass.h" | |||
| #include <set> | |||
| #include <string> | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | |||
| namespace mindspore::lite { | |||
| @@ -42,76 +43,110 @@ int GetInsertState(kernel::LiteKernel *kernel) { | |||
| return InsertNone; | |||
| } | |||
| int NPUInsertTransformPass::InsertPreNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||
| std::vector<kernel::LiteKernel *> *trans_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| int NPUInsertTransformPass::InsertNode(kernel::LiteKernel *kernel, kernel::LiteKernel *post_kernel, | |||
| std::vector<kernel::LiteKernel *> *trans_kernels) { | |||
| // Kernel and post_kernel can't be nullptr at the same time. | |||
| std::string kernel_name; | |||
| Tensor *in_tensor = nullptr; | |||
| std::vector<kernel::LiteKernel *> out_kernels; | |||
| // If post_kernel equals nullptr, kernel is the output of whole graph. | |||
| if (post_kernel != nullptr) { | |||
| out_kernels.push_back(post_kernel); | |||
| kernel_name = post_kernel->name() + "_pre"; | |||
| in_tensor = post_kernel->in_tensors()[0]; | |||
| } | |||
| std::vector<kernel::LiteKernel *> in_kernels; | |||
| // If kernel equals nullptr, post_kernel is the input of whole graph. | |||
| if (kernel != nullptr) { | |||
| in_kernels.push_back(kernel); | |||
| kernel_name = kernel->name() + "_post"; | |||
| in_tensor = kernel->out_tensors()[0]; | |||
| } | |||
| std::vector<int> nhwc_shape = in_tensor->shape(); | |||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||
| auto nh2nc_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR); | |||
| if (nh2nc_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "New nchw tensor failed when inserting nchw2nhwc kernel."; | |||
| return RET_ERROR; | |||
| } | |||
| std::vector<Tensor *> nh2nc_tensors = {nh2nc_tensor}; | |||
| all_tensors_->push_back(nh2nc_tensors[0]); | |||
| auto nc2nh_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nhwc_shape, schema::Format_NCHW, Tensor::VAR); | |||
| if (nc2nh_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "New nhwc tensor failed when inserting nhwc2nchw kernel."; | |||
| return RET_ERROR; | |||
| } | |||
| std::vector<Tensor *> nc2nh_tensors = {nc2nh_tensor}; | |||
| all_tensors_->push_back(nc2nh_tensors[0]); | |||
| auto nh2nc_name = kernel_name + "_nh2nc_" + std::to_string(total++); | |||
| auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel({in_tensor}, nh2nc_tensors, context_, nh2nc_name); | |||
| trans_kernels->push_back(nh2nc_kernel); | |||
| insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); | |||
| auto nc2nh_name = kernel_name + "_nc2nh_" + std::to_string(total++); | |||
| auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context_, nc2nh_name); | |||
| trans_kernels->push_back(nc2nh_kernel); | |||
| insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); | |||
| NPUPassUtils::UpdateKernel(nh2nc_kernel, in_kernels, {nc2nh_kernel}, {in_tensor}, nh2nc_tensors); | |||
| NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, out_kernels, nh2nc_tensors, nc2nh_tensors); | |||
| if (kernel != nullptr) { | |||
| NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, post_kernel); | |||
| } | |||
| if (post_kernel != nullptr) { | |||
| NPUPassUtils::UpdateNC2NHTransNodePostKernel(kernel, nc2nh_kernel, post_kernel); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int NPUInsertTransformPass::InsertPreNodes(kernel::LiteKernel *kernel, | |||
| std::vector<kernel::LiteKernel *> *trans_kernels) { | |||
| if (kernel->in_kernels().size() != kernel->in_tensors().size()) { | |||
| MS_LOG(DEBUG) << "The input tensors of kernel may be the input of whole graph or const tensor."; | |||
| return RET_OK; | |||
| } | |||
| if (kernel->in_kernels().empty()) { | |||
| auto ret = InsertNode(nullptr, kernel, trans_kernels); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel before kernel " << kernel->name() << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| for (auto in_kernel : kernel->in_kernels()) { | |||
| if (NPUPassUtils::IsNchw2Nhwc(in_kernel)) { | |||
| continue; | |||
| } | |||
| auto nhwc_shape = in_kernel->out_tensors()[0]->shape(); | |||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||
| auto nh2nc_tensor = | |||
| new Tensor(in_kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR); | |||
| std::vector<Tensor *> nh2nc_tensors = {nh2nc_tensor}; | |||
| all_tensors->push_back(nh2nc_tensors[0]); | |||
| auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nhwc_shape, schema::Format_NCHW, Tensor::VAR); | |||
| std::vector<Tensor *> nc2nh_tensors = {nc2nh_tensor}; | |||
| all_tensors->push_back(nc2nh_tensors[0]); | |||
| auto nh2nc_name = in_kernel->name() + "_nh2nc_" + std::to_string(total++); | |||
| auto *nh2nc_kernel = | |||
| NPUPassUtils::CreateNhwc2NchwKernel(in_kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); | |||
| trans_kernels->push_back(nh2nc_kernel); | |||
| insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); | |||
| auto nc2nh_name = in_kernel->name() + "_nc2nh_" + std::to_string(total++); | |||
| auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); | |||
| trans_kernels->push_back(nc2nh_kernel); | |||
| insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); | |||
| NPUPassUtils::UpdateKernel(nh2nc_kernel, {in_kernel}, {nc2nh_kernel}, in_kernel->out_tensors(), nh2nc_tensors); | |||
| NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {kernel}, nh2nc_tensors, nc2nh_tensors); | |||
| NPUPassUtils::UpdateNH2NCTransNodePreKernel(in_kernel, nh2nc_kernel, kernel); | |||
| NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(in_kernel, nc2nh_kernel, kernel); | |||
| auto ret = InsertNode(in_kernel, kernel, trans_kernels); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel before kernel " << kernel->name() << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int NPUInsertTransformPass::InsertPostNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||
| std::vector<kernel::LiteKernel *> *trans_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| int NPUInsertTransformPass::InsertPostNodes(kernel::LiteKernel *kernel, | |||
| std::vector<kernel::LiteKernel *> *trans_kernels) { | |||
| if (kernel->out_kernels().empty()) { | |||
| auto ret = InsertNode(kernel, nullptr, trans_kernels); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel after kernel " << kernel->name() << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| for (auto out_kernel : kernel->out_kernels()) { | |||
| if (NPUPassUtils::IsNhwc2Nchw(out_kernel)) { | |||
| continue; | |||
| } | |||
| auto nhwc_shape = kernel->out_tensors()[0]->shape(); | |||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||
| auto nh2nc_tensor = new Tensor(kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR); | |||
| std::vector<Tensor *> nh2nc_tensors = {nh2nc_tensor}; | |||
| all_tensors->push_back(nh2nc_tensors[0]); | |||
| auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nhwc_shape, schema::Format_NCHW, Tensor::VAR); | |||
| std::vector<Tensor *> nc2nh_tensors = {nc2nh_tensor}; | |||
| all_tensors->push_back(nc2nh_tensors[0]); | |||
| auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++); | |||
| auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); | |||
| trans_kernels->push_back(nh2nc_kernel); | |||
| insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); | |||
| auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++); | |||
| auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); | |||
| trans_kernels->push_back(nc2nh_kernel); | |||
| insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); | |||
| NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors); | |||
| NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors); | |||
| NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel); | |||
| NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel, nc2nh_kernel, out_kernel); | |||
| auto ret = InsertNode(kernel, out_kernel, trans_kernels); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel after kernel " << kernel->name() << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -123,15 +158,26 @@ int NPUInsertTransformPass::Run() { | |||
| continue; | |||
| } | |||
| auto insert_state = GetInsertState(kernel); | |||
| // If the every output kernel is nhwc2nchw, insert | |||
| // modify loop index add post_kernels.size() to the next kernel in the origin vector | |||
| if (insert_state == PreInsert) { | |||
| std::vector<kernel::LiteKernel *> pre_kernels; | |||
| InsertPreNode(context_, kernel, &pre_kernels, all_tensors_); | |||
| auto ret = InsertPreNodes(kernel, &pre_kernels); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel before kernel " << kernel->name() << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| all_kernels_->insert(all_kernels_->begin() + i, pre_kernels.begin(), pre_kernels.end()); | |||
| i += pre_kernels.size(); | |||
| } | |||
| if (insert_state == PostInsert) { | |||
| std::vector<kernel::LiteKernel *> post_kernels; | |||
| InsertPostNode(context_, kernel, &post_kernels, all_tensors_); | |||
| auto ret = InsertPostNodes(kernel, &post_kernels); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel after kernel " << kernel->name() << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| all_kernels_->insert(all_kernels_->begin() + i + 1, post_kernels.begin(), post_kernels.end()); | |||
| i += post_kernels.size(); | |||
| } | |||
| @@ -41,11 +41,12 @@ class NPUInsertTransformPass : public NPUBasePass { | |||
| int Run() override; | |||
| private: | |||
| int InsertPreNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||
| std::vector<kernel::LiteKernel *> *trans_kernels, std::vector<Tensor *> *all_tensors); | |||
| int InsertPreNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels); | |||
| int InsertPostNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||
| std::vector<kernel::LiteKernel *> *trans_kernels, std::vector<Tensor *> *all_tensors); | |||
| int InsertPostNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels); | |||
| int InsertNode(kernel::LiteKernel *kernel, kernel::LiteKernel *post_kernel, | |||
| std::vector<kernel::LiteKernel *> *trans_kernels); | |||
| private: | |||
| int total = 0; | |||
| @@ -15,6 +15,7 @@ | |||
| */ | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | |||
| #include "src/runtime/agent/npu/npu_manager.h" | |||
| #include "src/ops/transpose.h" | |||
| #include "nnacl/transpose.h" | |||
| #include "src/ops/populate/populate_register.h" | |||
| @@ -120,76 +121,80 @@ void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vector<ke | |||
| void NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *kernel) { | |||
| std::vector<kernel::LiteKernel *> out_kernels; | |||
| for (auto out_kernel : pre_kernel->out_kernels()) { | |||
| if (out_kernel == kernel) { | |||
| out_kernels.push_back(trans_kernel); | |||
| } else { | |||
| out_kernels.push_back(out_kernel); | |||
| // For kernel before trans, update the out_kernels; the output tensor of kernel is the input tensor of trans. | |||
| std::vector<kernel::LiteKernel *> out_kernels = pre_kernel->out_kernels(); | |||
| for (size_t i = 0; i < out_kernels.size(); i++) { | |||
| if (out_kernels[i] == kernel) { | |||
| out_kernels[i] = trans_kernel; | |||
| break; | |||
| } | |||
| } | |||
| pre_kernel->set_out_kernels(out_kernels); | |||
| } | |||
| void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *post_kernel) { | |||
| std::vector<kernel::LiteKernel *> cur_out_kernels; | |||
| for (auto out_kernel : kernel->out_kernels()) { | |||
| if (out_kernel == post_kernel) { | |||
| cur_out_kernels.push_back(trans_kernel); | |||
| } else { | |||
| cur_out_kernels.push_back(out_kernel); | |||
| void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel, | |||
| std::vector<kernel::LiteKernel *> kernels) { | |||
| // For kernel before trans, there may be multiple outputs. | |||
| auto cur_out_kernels = pre_kernel->out_kernels(); | |||
| for (size_t i = 0; i < kernels.size(); i++) { | |||
| auto itr = find(cur_out_kernels.begin(), cur_out_kernels.end(), kernels[i]); | |||
| if (itr != cur_out_kernels.end()) { | |||
| cur_out_kernels.erase(itr); | |||
| } | |||
| } | |||
| auto kernel_out_tensor = kernel->out_tensors()[0]; | |||
| // Change format the output of the current kernel nhwc->nchw | |||
| auto nhwc_shape = kernel_out_tensor->shape(); | |||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||
| kernel_out_tensor->set_format(schema::Format_NCHW); | |||
| kernel_out_tensor->set_shape(nchw_shape); | |||
| kernel->set_out_kernels(cur_out_kernels); | |||
| kernel->set_out_tensors({kernel_out_tensor}); | |||
| cur_out_kernels.push_back(trans_kernel); | |||
| pre_kernel->set_out_kernels(cur_out_kernels); | |||
| // For kernel before trans, the output tensor is used for output tensor of trans, so replace the output tensor with | |||
| // the input tensor of trans. | |||
| pre_kernel->set_out_tensors(trans_kernel->in_tensors()); | |||
| } | |||
| void NPUPassUtils::UpdateNH2NCTransNodePostKernel(kernel::LiteKernel *trans_kernel, kernel::LiteKernel *post_kernel) { | |||
| auto cur_in_tensors = post_kernel->in_tensors(); | |||
| cur_in_tensors[0] = trans_kernel->out_tensors()[0]; | |||
| post_kernel->set_in_tensors(cur_in_tensors); | |||
| post_kernel->set_in_kernels({trans_kernel}); | |||
| } | |||
| void NPUPassUtils::UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *pre_kernel) { | |||
| std::vector<lite::Tensor *> cur_kernel_in_tensors = {trans_kernel->out_tensors()[0]}; | |||
| for (int i = 1; i < kernel->in_tensors().size(); i++) { | |||
| cur_kernel_in_tensors.push_back(kernel->in_tensors()[i]); | |||
| } | |||
| std::vector<kernel::LiteKernel *> cur_in_kernels = {trans_kernel}; | |||
| for (int i = 1; i < kernel->in_kernels().size(); i++) { | |||
| auto in_kernel = kernel->in_kernels()[i]; | |||
| if (in_kernel != kernel) { | |||
| cur_in_kernels.push_back(in_kernel); | |||
| void NPUPassUtils::UpdateNC2NHPostKernelInTensors(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *post_kernel) { | |||
| // For post_kernel that doesn't require insert trans kernel, because the output tensor of kernel(input tensor of | |||
| // trans_kernel) is updated, replace the input tensor of post_kernel. | |||
| auto post_in_tensors = post_kernel->in_tensors(); | |||
| for (size_t i = 0; i < post_in_tensors.size(); i++) { | |||
| if (post_in_tensors[i] == kernel->out_tensors()[0]) { | |||
| post_in_tensors[i] = trans_kernel->in_tensors()[0]; | |||
| break; | |||
| } | |||
| } | |||
| kernel->set_in_kernels(cur_in_kernels); | |||
| kernel->set_in_tensors({cur_kernel_in_tensors}); | |||
| post_kernel->set_in_tensors(post_in_tensors); | |||
| } | |||
| void NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *post_kernel) { | |||
| std::vector<Tensor *> post_in_tensors; | |||
| for (auto post_in_tensor : post_kernel->in_tensors()) { | |||
| if (post_in_tensor != kernel->out_tensors()[0]) { | |||
| post_in_tensors.push_back(post_in_tensor); | |||
| } else { | |||
| post_in_tensors.push_back(trans_kernel->out_tensors()[0]); | |||
| void NPUPassUtils::UpdateNC2NHTransNodePostKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *post_kernel) { | |||
| // For post_kernel after trans, kernel should be replaced with trans_kernel. | |||
| auto post_in_tensors = post_kernel->in_tensors(); | |||
| if (kernel == nullptr) { | |||
| post_in_tensors[0] = trans_kernel->out_tensors()[0]; | |||
| } else { | |||
| for (size_t i = 0; i < post_in_tensors.size(); i++) { | |||
| if (post_in_tensors[i] == kernel->out_tensors()[0]) { | |||
| post_in_tensors[i] = trans_kernel->out_tensors()[0]; | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| post_kernel->set_in_tensors(post_in_tensors); | |||
| std::vector<kernel::LiteKernel *> post_in_kernels; | |||
| for (auto in_kernel : post_kernel->in_kernels()) { | |||
| if (in_kernel == kernel) { | |||
| post_in_kernels.push_back(trans_kernel); | |||
| } else { | |||
| post_in_kernels.push_back(in_kernel); | |||
| // The input tensor should be replaced with the output tensor of trans_kernel. | |||
| std::vector<kernel::LiteKernel *> post_in_kernels = post_kernel->in_kernels(); | |||
| for (size_t i = 0; i < post_in_kernels.size(); i++) { | |||
| if (post_in_kernels[i] == kernel) { | |||
| post_in_kernels[i] = trans_kernel; | |||
| break; | |||
| } | |||
| } | |||
| post_kernel->set_in_kernels(post_in_kernels); | |||
| post_kernel->set_in_tensors({post_in_tensors}); | |||
| } | |||
| bool NPUPassUtils::IsNhwc2Nchw(const kernel::LiteKernel *kernel) { | |||
| @@ -38,14 +38,16 @@ class NPUPassUtils { | |||
| static void UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *kernel); | |||
| static void UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *post_kernel); | |||
| static void UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel, | |||
| std::vector<kernel::LiteKernel *> kernels); | |||
| static void UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *pre_kernel); | |||
| static void UpdateNH2NCTransNodePostKernel(kernel::LiteKernel *trans_kernel, kernel::LiteKernel *post_kernel); | |||
| static void UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *post_kernel); | |||
| static void UpdateNC2NHTransNodePostKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *post_kernel); | |||
| static void UpdateNC2NHPostKernelInTensors(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *post_kernel); | |||
| static bool IsNhwc2Nchw(const kernel::LiteKernel *kernel); | |||
| @@ -20,10 +20,9 @@ | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | |||
| namespace mindspore::lite { | |||
| using kernel::KERNEL_ARCH::kNPU; | |||
| int NPUTransformPass::InsertPreNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||
| std::vector<kernel::LiteKernel *> *trans_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| int NPUTransformPass::InsertPreNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels) { | |||
| bool is_input_kernel = kernel->in_kernels().empty(); | |||
| // single input | |||
| if (is_input_kernel || kernel->in_kernels()[0]->desc().arch != kNPU || | |||
| npu_trans_nodes.find(kernel->in_kernels()[0]->Type()) == npu_trans_nodes.end()) { | |||
| kernel::LiteKernel *pre_kernel = nullptr; | |||
| @@ -34,69 +33,86 @@ int NPUTransformPass::InsertPreNode(const InnerContext *context, kernel::LiteKer | |||
| // Create pre transform kernel's out tensor. | |||
| auto nhwc_shape = kernel->in_tensors()[0]->shape(); | |||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||
| auto tensor = new Tensor(kernel->in_tensors()[0]->data_type(), nchw_shape, schema::Format_NCHW, Tensor::VAR); | |||
| auto tensor = | |||
| new (std::nothrow) Tensor(kernel->in_tensors()[0]->data_type(), nchw_shape, schema::Format_NCHW, Tensor::VAR); | |||
| if (tensor == nullptr) { | |||
| MS_LOG(ERROR) << "New nchw tensor failed when inserting pre nhwc2nchw kernel."; | |||
| return RET_ERROR; | |||
| } | |||
| std::vector<Tensor *> pre_trans_out_tensors = {tensor}; | |||
| all_tensors->push_back(pre_trans_out_tensors[0]); | |||
| all_tensors_->push_back(pre_trans_out_tensors[0]); | |||
| // Create pre transform kernel: Nhwc2Nchw | |||
| auto name = kernel->name() + "_pre_trans" + "_Nhwc2Nchw_" + std::to_string(total++); | |||
| auto *trans_kernel = | |||
| NPUPassUtils::CreateNhwc2NchwKernel({kernel->in_tensors()[0]}, pre_trans_out_tensors, context, name); | |||
| NPUPassUtils::CreateNhwc2NchwKernel({kernel->in_tensors()[0]}, pre_trans_out_tensors, context_, name); | |||
| trans_kernels->push_back(trans_kernel); | |||
| insert_primitive_.push_back(trans_kernel->GetPrimitive()); | |||
| // Set in_kernels, out_kernels, in_tensors,out_tensors for transform kernel | |||
| std::vector<kernel::LiteKernel *> pre_trans_in_kernel; | |||
| if (is_input_kernel) { | |||
| pre_trans_in_kernel = {}; | |||
| } else { | |||
| pre_trans_in_kernel = {pre_kernel}; | |||
| // Set in_kernels, out_kernels, in_tensors, out_tensors for transform kernel | |||
| std::vector<kernel::LiteKernel *> pre_trans_in_kernels; | |||
| if (!is_input_kernel) { | |||
| pre_trans_in_kernels = {pre_kernel}; | |||
| } | |||
| NPUPassUtils::UpdateKernel(trans_kernel, pre_trans_in_kernel, {kernel}, {kernel->in_tensors()[0]}, | |||
| NPUPassUtils::UpdateKernel(trans_kernel, pre_trans_in_kernels, {kernel}, {kernel->in_tensors()[0]}, | |||
| pre_trans_out_tensors); | |||
| if (pre_kernel != nullptr) { | |||
| NPUPassUtils::UpdateNH2NCTransNodePreKernel(pre_kernel, trans_kernel, kernel); | |||
| } | |||
| NPUPassUtils::UpdateNH2NCTransNodeAfterKernel(kernel, trans_kernel, pre_kernel); | |||
| NPUPassUtils::UpdateNH2NCTransNodePostKernel(trans_kernel, kernel); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int NPUTransformPass::InsertPostNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||
| std::vector<kernel::LiteKernel *> *trans_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| // Model output does not insert operator | |||
| if (kernel->out_kernels().empty()) { | |||
| return RET_OK; | |||
| } | |||
| // Single output multiple references | |||
| int NPUTransformPass::InsertPostNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels) { | |||
| bool is_output_kernel = kernel->out_kernels().empty(); | |||
| // Get the post kernel that need insert trans kernel. | |||
| // If no need for inserting trans kernel, the post kernel must be npu and in trans_nodes. | |||
| std::vector<kernel::LiteKernel *> post_insert_kernels; | |||
| for (int i = 0; i < kernel->out_kernels().size(); i++) { | |||
| auto post_kernel = kernel->out_kernels().at(i); | |||
| if (post_kernel->desc().arch == kNPU && npu_trans_nodes.find(post_kernel->Type()) != npu_trans_nodes.end()) { | |||
| continue; | |||
| auto post_kernel = kernel->out_kernels()[i]; | |||
| if (post_kernel->desc().arch != kNPU || npu_trans_nodes.find(post_kernel->Type()) == npu_trans_nodes.end()) { | |||
| post_insert_kernels.push_back(post_kernel); | |||
| } | |||
| // Create post transform kernel's out tensor. | |||
| auto tensor = new Tensor(kernel->out_tensors()[0]->data_type(), kernel->out_tensors()[0]->shape(), | |||
| schema::Format_NHWC, Tensor::VAR); | |||
| std::vector<Tensor *> post_trans_out_tensors = {tensor}; | |||
| all_tensors->push_back(post_trans_out_tensors[0]); | |||
| } | |||
| if (is_output_kernel || !post_insert_kernels.empty()) { | |||
| // Create post transform kernel's in tensor. | |||
| auto nhwc_shape = kernel->out_tensors()[0]->shape(); | |||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||
| auto tensor = | |||
| new (std::nothrow) Tensor(kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR); | |||
| if (tensor == nullptr) { | |||
| MS_LOG(ERROR) << "New nchw tensor failed when inserting post nchw2nhwc kernel."; | |||
| return RET_ERROR; | |||
| } | |||
| std::vector<Tensor *> post_trans_in_tensors = {tensor}; | |||
| all_tensors_->push_back(tensor); | |||
| auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++); | |||
| tensor->set_tensor_name(name + "/input0"); | |||
| // Create post transform kernel: Nchw2Nhwc | |||
| auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++); | |||
| auto *post_trans_kernel = | |||
| NPUPassUtils::CreateNchw2NhwcKernel(kernel->out_tensors(), post_trans_out_tensors, context, name); | |||
| NPUPassUtils::CreateNchw2NhwcKernel(post_trans_in_tensors, kernel->out_tensors(), context_, name); | |||
| // Set in_kernels, out_kernels, in_tensors,out_tensors for transform kernel | |||
| NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {post_kernel}, kernel->out_tensors(), | |||
| post_trans_out_tensors); | |||
| // Set in_kernels, out_kernels, in_tensors, out_tensors for transform kernel | |||
| NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, post_insert_kernels, post_trans_in_tensors, | |||
| kernel->out_tensors()); | |||
| insert_primitive_.push_back(post_trans_kernel->GetPrimitive()); | |||
| trans_kernels->push_back(post_trans_kernel); | |||
| NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, post_kernel); | |||
| NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel, post_trans_kernel, post_kernel); | |||
| if (!is_output_kernel) { | |||
| for (int i = 0; i < kernel->out_kernels().size(); i++) { | |||
| auto post_kernel = kernel->out_kernels()[i]; | |||
| if (find(post_insert_kernels.begin(), post_insert_kernels.end(), post_kernel) != post_insert_kernels.end()) { | |||
| NPUPassUtils::UpdateNC2NHTransNodePostKernel(kernel, post_trans_kernel, post_kernel); | |||
| } else { | |||
| NPUPassUtils::UpdateNC2NHPostKernelInTensors(kernel, post_trans_kernel, post_kernel); | |||
| } | |||
| } | |||
| } | |||
| NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, post_insert_kernels); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -108,13 +124,25 @@ int NPUTransformPass::Run() { | |||
| i++; | |||
| continue; | |||
| } | |||
| // insert pre_kernels before kernel in vector | |||
| // modify loop index add (pre_kernels.size() + 1) to the post_kernels insert location | |||
| std::vector<kernel::LiteKernel *> pre_kernels; | |||
| InsertPreNode(context_, kernel, &pre_kernels, all_tensors_); | |||
| auto ret = InsertPreNodes(kernel, &pre_kernels); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Insert nhwc2nchw kernel before kernel " << kernel->name() << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| all_kernels_->insert(all_kernels_->begin() + i, pre_kernels.begin(), pre_kernels.end()); | |||
| i += (pre_kernels.size() + 1); | |||
| // insert post_kernels after kernel in vector | |||
| // modify loop index add post_kernels.size() to the next kernel in the origin vector | |||
| std::vector<kernel::LiteKernel *> post_kernels; | |||
| InsertPostNode(context_, kernel, &post_kernels, all_tensors_); | |||
| ret = InsertPostNodes(kernel, &post_kernels); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Insert nchw2nhwc kernel after kernel " << kernel->name() << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| all_kernels_->insert(all_kernels_->begin() + i, post_kernels.begin(), post_kernels.end()); | |||
| i += post_kernels.size(); | |||
| } | |||
| @@ -42,11 +42,9 @@ class NPUTransformPass : public NPUBasePass { | |||
| } | |||
| private: | |||
| int InsertPreNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||
| std::vector<kernel::LiteKernel *> *trans_kernels, std::vector<Tensor *> *all_tensors); | |||
| int InsertPreNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels); | |||
| int InsertPostNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||
| std::vector<kernel::LiteKernel *> *trans_kernels, std::vector<Tensor *> *all_tensors); | |||
| int InsertPostNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels); | |||
| private: | |||
| int total = 0; | |||
| @@ -21,6 +21,9 @@ | |||
| using mindspore::kernel::KERNEL_ARCH::kNPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::schema::ActivationType_NO_ACTIVATION; | |||
| using mindspore::schema::ActivationType_RELU; | |||
| using mindspore::schema::ActivationType_RELU6; | |||
| using mindspore::schema::PrimitiveType_Add; | |||
| using mindspore::schema::PrimitiveType_Div; | |||
| using mindspore::schema::PrimitiveType_Equal; | |||
| @@ -118,7 +121,6 @@ int ArithmeticNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||
| case PrimitiveType_GreaterEqual: | |||
| op = CreateOperator<hiai::op::GreaterEqual>(npu_inputs, name_); | |||
| break; | |||
| default: | |||
| MS_LOG(ERROR) << "Unsupported primitive type:" | |||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(primitive_->Type())); | |||
| @@ -129,16 +131,42 @@ int ArithmeticNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||
| return RET_ERROR; | |||
| } | |||
| op_ = op; | |||
| if (activation_type_ != ActivationType_NO_ACTIVATION) { | |||
| act_ = new (std::nothrow) hiai::op::Activation(name_ + "_act"); | |||
| if (act_ == nullptr) { | |||
| MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| act_->set_input_x(*op_); | |||
| if (activation_type_ == ActivationType_RELU) { | |||
| act_->set_attr_mode(1); | |||
| } else if (activation_type_ == ActivationType_RELU6) { | |||
| act_->set_attr_mode(14); | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupport activation type for op " << name_; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| ge::Operator *mindspore::kernel::ArithmeticNPUKernel::GetNPUOp() { return this->op_; } | |||
| ge::Operator *mindspore::kernel::ArithmeticNPUKernel::GetNPUOp() { | |||
| if (activation_type_ == ActivationType_NO_ACTIVATION) { | |||
| return op_; | |||
| } | |||
| return act_; | |||
| } | |||
| ArithmeticNPUKernel::~ArithmeticNPUKernel() { | |||
| if (op_ != nullptr) { | |||
| delete op_; | |||
| op_ = nullptr; | |||
| } | |||
| if (act_ != nullptr) { | |||
| delete act_; | |||
| act_ = nullptr; | |||
| } | |||
| } | |||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Mul, NPUKernelCreator<ArithmeticNPUKernel>) | |||
| @@ -17,15 +17,18 @@ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_ | |||
| #include <vector> | |||
| #include "nnacl/arithmetic.h" | |||
| #include "src/runtime/kernel/npu/npu_kernel.h" | |||
| #include "include/graph/op/math_defs.h" | |||
| #include "include/graph/op/all_ops.h" | |||
| namespace mindspore::kernel { | |||
| class ArithmeticNPUKernel : public NPUKernel { | |||
| public: | |||
| ArithmeticNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| activation_type_ = reinterpret_cast<ArithmeticParameter *>(parameter)->activation_type_; | |||
| } | |||
| ~ArithmeticNPUKernel() override; | |||
| int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| @@ -36,7 +39,9 @@ class ArithmeticNPUKernel : public NPUKernel { | |||
| ge::Operator *GetNPUOp() override; | |||
| private: | |||
| int activation_type_; | |||
| ge::Operator *op_ = nullptr; | |||
| hiai::op::Activation *act_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_ | |||
| @@ -0,0 +1,57 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/npu/squeeze_npu.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "src/runtime/agent/npu/npu_converter_utils.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kNPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::schema::PrimitiveType_Squeeze; | |||
| namespace mindspore::kernel { | |||
| int SqueezeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| OpParameter *opParameter) { | |||
| return RET_OK; | |||
| } | |||
| int SqueezeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) { | |||
| op_ = new (std::nothrow) hiai::op::Squeeze(name_); | |||
| if (op_ == nullptr) { | |||
| MS_LOG(ERROR) << "New squeeze npu operator for op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| std::vector<int64_t> axes; | |||
| for (int i = 0; i < axes_.size(); i++) { | |||
| axes.push_back(axes_[i]); | |||
| } | |||
| op_->set_input_x(*npu_inputs[0]); | |||
| op_->set_attr_axis(axes); | |||
| return RET_OK; | |||
| } | |||
| ge::Operator *mindspore::kernel::SqueezeNPUKernel::GetNPUOp() { return this->op_; } | |||
| SqueezeNPUKernel::~SqueezeNPUKernel() { | |||
| if (op_ != nullptr) { | |||
| delete op_; | |||
| op_ = nullptr; | |||
| } | |||
| } | |||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Squeeze, NPUKernelCreator<SqueezeNPUKernel>) | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,46 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SQUEEZE_NPU_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SQUEEZE_NPU_H_ | |||
| #include <vector> | |||
| #include "src/ops/squeeze.h" | |||
| #include "src/runtime/kernel/npu/npu_kernel.h" | |||
| #include "include/graph/op/all_ops.h" | |||
| namespace mindspore::kernel { | |||
| class SqueezeNPUKernel : public NPUKernel { | |||
| public: | |||
| SqueezeNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| auto squeeze = reinterpret_cast<const mindspore::lite::Squeeze *>(primitive); | |||
| axes_ = squeeze->GetAxis(); | |||
| } | |||
| ~SqueezeNPUKernel() override; | |||
| int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| OpParameter *opParameter) override; | |||
| int SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) override; | |||
| ge::Operator *GetNPUOp() override; | |||
| private: | |||
| hiai::op::Squeeze *op_ = nullptr; | |||
| vector<int> axes_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SQUEEZE_NPU_H_ | |||
| @@ -16,6 +16,7 @@ mobilenet_v1_1.0_192.tflite 6 | |||
| mobilenet_v1_1.0_224.tflite 2.5 | |||
| mobilenet_v2_1.0_224.tflite 2.5 | |||
| squeezenet.tflite 2.5 | |||
| inception_resnet_v2.tflite 2 | |||
| inception_v3.tflite 1 | |||
| inception_v4.tflite 0.5 | |||
| efficientnet_lite0_fp32_2.tflite 1 | |||
| @@ -23,6 +24,7 @@ efficientnet_lite1_fp32_2.tflite 1 | |||
| efficientnet_lite2_fp32_2.tflite 1 | |||
| efficientnet_lite3_fp32_2.tflite 1 | |||
| efficientnet_lite4_fp32_2.tflite 1 | |||
| deeplabv3_1_default_1.tflite 2.5 | |||
| 6c_seg_nomean_20200610 1.5 | |||
| ml_video_edit_person_divison 0.5 | |||
| porseg_tmp.onnx 1 2 | |||