From: @yangruoqi713 Reviewed-by: Signed-off-by:tags/v1.2.0-rc1
| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "src/runtime/agent/npu/optimizer/npu_insert_transform_pass.h" | #include "src/runtime/agent/npu/optimizer/npu_insert_transform_pass.h" | ||||
| #include <set> | #include <set> | ||||
| #include <string> | |||||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | ||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| @@ -42,76 +43,110 @@ int GetInsertState(kernel::LiteKernel *kernel) { | |||||
| return InsertNone; | return InsertNone; | ||||
| } | } | ||||
| int NPUInsertTransformPass::InsertPreNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||||
| std::vector<kernel::LiteKernel *> *trans_kernels, | |||||
| std::vector<Tensor *> *all_tensors) { | |||||
| int NPUInsertTransformPass::InsertNode(kernel::LiteKernel *kernel, kernel::LiteKernel *post_kernel, | |||||
| std::vector<kernel::LiteKernel *> *trans_kernels) { | |||||
| // Kernel and post_kernel can't be nullptr at the same time. | |||||
| std::string kernel_name; | |||||
| Tensor *in_tensor = nullptr; | |||||
| std::vector<kernel::LiteKernel *> out_kernels; | |||||
| // If post_kernel equals nullptr, kernel is the output of whole graph. | |||||
| if (post_kernel != nullptr) { | |||||
| out_kernels.push_back(post_kernel); | |||||
| kernel_name = post_kernel->name() + "_pre"; | |||||
| in_tensor = post_kernel->in_tensors()[0]; | |||||
| } | |||||
| std::vector<kernel::LiteKernel *> in_kernels; | |||||
| // If kernel equals nullptr, post_kernel is the input of whole graph. | |||||
| if (kernel != nullptr) { | |||||
| in_kernels.push_back(kernel); | |||||
| kernel_name = kernel->name() + "_post"; | |||||
| in_tensor = kernel->out_tensors()[0]; | |||||
| } | |||||
| std::vector<int> nhwc_shape = in_tensor->shape(); | |||||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||||
| auto nh2nc_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR); | |||||
| if (nh2nc_tensor == nullptr) { | |||||
| MS_LOG(ERROR) << "New nchw tensor failed when inserting nchw2nhwc kernel."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| std::vector<Tensor *> nh2nc_tensors = {nh2nc_tensor}; | |||||
| all_tensors_->push_back(nh2nc_tensors[0]); | |||||
| auto nc2nh_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nhwc_shape, schema::Format_NCHW, Tensor::VAR); | |||||
| if (nc2nh_tensor == nullptr) { | |||||
| MS_LOG(ERROR) << "New nhwc tensor failed when inserting nhwc2nchw kernel."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| std::vector<Tensor *> nc2nh_tensors = {nc2nh_tensor}; | |||||
| all_tensors_->push_back(nc2nh_tensors[0]); | |||||
| auto nh2nc_name = kernel_name + "_nh2nc_" + std::to_string(total++); | |||||
| auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel({in_tensor}, nh2nc_tensors, context_, nh2nc_name); | |||||
| trans_kernels->push_back(nh2nc_kernel); | |||||
| insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); | |||||
| auto nc2nh_name = kernel_name + "_nc2nh_" + std::to_string(total++); | |||||
| auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context_, nc2nh_name); | |||||
| trans_kernels->push_back(nc2nh_kernel); | |||||
| insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); | |||||
| NPUPassUtils::UpdateKernel(nh2nc_kernel, in_kernels, {nc2nh_kernel}, {in_tensor}, nh2nc_tensors); | |||||
| NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, out_kernels, nh2nc_tensors, nc2nh_tensors); | |||||
| if (kernel != nullptr) { | |||||
| NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, post_kernel); | |||||
| } | |||||
| if (post_kernel != nullptr) { | |||||
| NPUPassUtils::UpdateNC2NHTransNodePostKernel(kernel, nc2nh_kernel, post_kernel); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUInsertTransformPass::InsertPreNodes(kernel::LiteKernel *kernel, | |||||
| std::vector<kernel::LiteKernel *> *trans_kernels) { | |||||
| if (kernel->in_kernels().size() != kernel->in_tensors().size()) { | |||||
| MS_LOG(DEBUG) << "The input tensors of kernel may be the input of whole graph or const tensor."; | |||||
| return RET_OK; | |||||
| } | |||||
| if (kernel->in_kernels().empty()) { | |||||
| auto ret = InsertNode(nullptr, kernel, trans_kernels); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel before kernel " << kernel->name() << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | |||||
| for (auto in_kernel : kernel->in_kernels()) { | for (auto in_kernel : kernel->in_kernels()) { | ||||
| if (NPUPassUtils::IsNchw2Nhwc(in_kernel)) { | if (NPUPassUtils::IsNchw2Nhwc(in_kernel)) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| auto nhwc_shape = in_kernel->out_tensors()[0]->shape(); | |||||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||||
| auto nh2nc_tensor = | |||||
| new Tensor(in_kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR); | |||||
| std::vector<Tensor *> nh2nc_tensors = {nh2nc_tensor}; | |||||
| all_tensors->push_back(nh2nc_tensors[0]); | |||||
| auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nhwc_shape, schema::Format_NCHW, Tensor::VAR); | |||||
| std::vector<Tensor *> nc2nh_tensors = {nc2nh_tensor}; | |||||
| all_tensors->push_back(nc2nh_tensors[0]); | |||||
| auto nh2nc_name = in_kernel->name() + "_nh2nc_" + std::to_string(total++); | |||||
| auto *nh2nc_kernel = | |||||
| NPUPassUtils::CreateNhwc2NchwKernel(in_kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); | |||||
| trans_kernels->push_back(nh2nc_kernel); | |||||
| insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); | |||||
| auto nc2nh_name = in_kernel->name() + "_nc2nh_" + std::to_string(total++); | |||||
| auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); | |||||
| trans_kernels->push_back(nc2nh_kernel); | |||||
| insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); | |||||
| NPUPassUtils::UpdateKernel(nh2nc_kernel, {in_kernel}, {nc2nh_kernel}, in_kernel->out_tensors(), nh2nc_tensors); | |||||
| NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {kernel}, nh2nc_tensors, nc2nh_tensors); | |||||
| NPUPassUtils::UpdateNH2NCTransNodePreKernel(in_kernel, nh2nc_kernel, kernel); | |||||
| NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(in_kernel, nc2nh_kernel, kernel); | |||||
| auto ret = InsertNode(in_kernel, kernel, trans_kernels); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel before kernel " << kernel->name() << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int NPUInsertTransformPass::InsertPostNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||||
| std::vector<kernel::LiteKernel *> *trans_kernels, | |||||
| std::vector<Tensor *> *all_tensors) { | |||||
| int NPUInsertTransformPass::InsertPostNodes(kernel::LiteKernel *kernel, | |||||
| std::vector<kernel::LiteKernel *> *trans_kernels) { | |||||
| if (kernel->out_kernels().empty()) { | |||||
| auto ret = InsertNode(kernel, nullptr, trans_kernels); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel after kernel " << kernel->name() << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | |||||
| for (auto out_kernel : kernel->out_kernels()) { | for (auto out_kernel : kernel->out_kernels()) { | ||||
| if (NPUPassUtils::IsNhwc2Nchw(out_kernel)) { | if (NPUPassUtils::IsNhwc2Nchw(out_kernel)) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| auto nhwc_shape = kernel->out_tensors()[0]->shape(); | |||||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||||
| auto nh2nc_tensor = new Tensor(kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR); | |||||
| std::vector<Tensor *> nh2nc_tensors = {nh2nc_tensor}; | |||||
| all_tensors->push_back(nh2nc_tensors[0]); | |||||
| auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nhwc_shape, schema::Format_NCHW, Tensor::VAR); | |||||
| std::vector<Tensor *> nc2nh_tensors = {nc2nh_tensor}; | |||||
| all_tensors->push_back(nc2nh_tensors[0]); | |||||
| auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++); | |||||
| auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); | |||||
| trans_kernels->push_back(nh2nc_kernel); | |||||
| insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); | |||||
| auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++); | |||||
| auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); | |||||
| trans_kernels->push_back(nc2nh_kernel); | |||||
| insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); | |||||
| NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors); | |||||
| NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors); | |||||
| NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel); | |||||
| NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel, nc2nh_kernel, out_kernel); | |||||
| auto ret = InsertNode(kernel, out_kernel, trans_kernels); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel after kernel " << kernel->name() << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -123,15 +158,26 @@ int NPUInsertTransformPass::Run() { | |||||
| continue; | continue; | ||||
| } | } | ||||
| auto insert_state = GetInsertState(kernel); | auto insert_state = GetInsertState(kernel); | ||||
| // If the every output kernel is nhwc2nchw, insert | |||||
| // modify loop index add post_kernels.size() to the next kernel in the origin vector | |||||
| if (insert_state == PreInsert) { | if (insert_state == PreInsert) { | ||||
| std::vector<kernel::LiteKernel *> pre_kernels; | std::vector<kernel::LiteKernel *> pre_kernels; | ||||
| InsertPreNode(context_, kernel, &pre_kernels, all_tensors_); | |||||
| auto ret = InsertPreNodes(kernel, &pre_kernels); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel before kernel " << kernel->name() << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| all_kernels_->insert(all_kernels_->begin() + i, pre_kernels.begin(), pre_kernels.end()); | all_kernels_->insert(all_kernels_->begin() + i, pre_kernels.begin(), pre_kernels.end()); | ||||
| i += pre_kernels.size(); | i += pre_kernels.size(); | ||||
| } | } | ||||
| if (insert_state == PostInsert) { | if (insert_state == PostInsert) { | ||||
| std::vector<kernel::LiteKernel *> post_kernels; | std::vector<kernel::LiteKernel *> post_kernels; | ||||
| InsertPostNode(context_, kernel, &post_kernels, all_tensors_); | |||||
| auto ret = InsertPostNodes(kernel, &post_kernels); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel after kernel " << kernel->name() << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| all_kernels_->insert(all_kernels_->begin() + i + 1, post_kernels.begin(), post_kernels.end()); | all_kernels_->insert(all_kernels_->begin() + i + 1, post_kernels.begin(), post_kernels.end()); | ||||
| i += post_kernels.size(); | i += post_kernels.size(); | ||||
| } | } | ||||
| @@ -41,11 +41,12 @@ class NPUInsertTransformPass : public NPUBasePass { | |||||
| int Run() override; | int Run() override; | ||||
| private: | private: | ||||
| int InsertPreNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||||
| std::vector<kernel::LiteKernel *> *trans_kernels, std::vector<Tensor *> *all_tensors); | |||||
| int InsertPreNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels); | |||||
| int InsertPostNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||||
| std::vector<kernel::LiteKernel *> *trans_kernels, std::vector<Tensor *> *all_tensors); | |||||
| int InsertPostNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels); | |||||
| int InsertNode(kernel::LiteKernel *kernel, kernel::LiteKernel *post_kernel, | |||||
| std::vector<kernel::LiteKernel *> *trans_kernels); | |||||
| private: | private: | ||||
| int total = 0; | int total = 0; | ||||
| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | ||||
| #include "src/runtime/agent/npu/npu_manager.h" | |||||
| #include "src/ops/transpose.h" | #include "src/ops/transpose.h" | ||||
| #include "nnacl/transpose.h" | #include "nnacl/transpose.h" | ||||
| #include "src/ops/populate/populate_register.h" | #include "src/ops/populate/populate_register.h" | ||||
| @@ -120,76 +121,80 @@ void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vector<ke | |||||
| void NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel, | void NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel, | ||||
| kernel::LiteKernel *kernel) { | kernel::LiteKernel *kernel) { | ||||
| std::vector<kernel::LiteKernel *> out_kernels; | |||||
| for (auto out_kernel : pre_kernel->out_kernels()) { | |||||
| if (out_kernel == kernel) { | |||||
| out_kernels.push_back(trans_kernel); | |||||
| } else { | |||||
| out_kernels.push_back(out_kernel); | |||||
| // For kernel before trans, update the out_kernels; the output tensor of kernel is the input tensor of trans. | |||||
| std::vector<kernel::LiteKernel *> out_kernels = pre_kernel->out_kernels(); | |||||
| for (size_t i = 0; i < out_kernels.size(); i++) { | |||||
| if (out_kernels[i] == kernel) { | |||||
| out_kernels[i] = trans_kernel; | |||||
| break; | |||||
| } | } | ||||
| } | } | ||||
| pre_kernel->set_out_kernels(out_kernels); | pre_kernel->set_out_kernels(out_kernels); | ||||
| } | } | ||||
| void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *post_kernel) { | |||||
| std::vector<kernel::LiteKernel *> cur_out_kernels; | |||||
| for (auto out_kernel : kernel->out_kernels()) { | |||||
| if (out_kernel == post_kernel) { | |||||
| cur_out_kernels.push_back(trans_kernel); | |||||
| } else { | |||||
| cur_out_kernels.push_back(out_kernel); | |||||
| void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel, | |||||
| std::vector<kernel::LiteKernel *> kernels) { | |||||
| // For kernel before trans, there may be multiple outputs. | |||||
| auto cur_out_kernels = pre_kernel->out_kernels(); | |||||
| for (size_t i = 0; i < kernels.size(); i++) { | |||||
| auto itr = find(cur_out_kernels.begin(), cur_out_kernels.end(), kernels[i]); | |||||
| if (itr != cur_out_kernels.end()) { | |||||
| cur_out_kernels.erase(itr); | |||||
| } | } | ||||
| } | } | ||||
| auto kernel_out_tensor = kernel->out_tensors()[0]; | |||||
| // Change format the output of the current kernel nhwc->nchw | |||||
| auto nhwc_shape = kernel_out_tensor->shape(); | |||||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||||
| kernel_out_tensor->set_format(schema::Format_NCHW); | |||||
| kernel_out_tensor->set_shape(nchw_shape); | |||||
| kernel->set_out_kernels(cur_out_kernels); | |||||
| kernel->set_out_tensors({kernel_out_tensor}); | |||||
| cur_out_kernels.push_back(trans_kernel); | |||||
| pre_kernel->set_out_kernels(cur_out_kernels); | |||||
| // For kernel before trans, the output tensor is used for output tensor of trans, so replace the output tensor with | |||||
| // the input tensor of trans. | |||||
| pre_kernel->set_out_tensors(trans_kernel->in_tensors()); | |||||
| } | |||||
| void NPUPassUtils::UpdateNH2NCTransNodePostKernel(kernel::LiteKernel *trans_kernel, kernel::LiteKernel *post_kernel) { | |||||
| auto cur_in_tensors = post_kernel->in_tensors(); | |||||
| cur_in_tensors[0] = trans_kernel->out_tensors()[0]; | |||||
| post_kernel->set_in_tensors(cur_in_tensors); | |||||
| post_kernel->set_in_kernels({trans_kernel}); | |||||
| } | } | ||||
| void NPUPassUtils::UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *pre_kernel) { | |||||
| std::vector<lite::Tensor *> cur_kernel_in_tensors = {trans_kernel->out_tensors()[0]}; | |||||
| for (int i = 1; i < kernel->in_tensors().size(); i++) { | |||||
| cur_kernel_in_tensors.push_back(kernel->in_tensors()[i]); | |||||
| } | |||||
| std::vector<kernel::LiteKernel *> cur_in_kernels = {trans_kernel}; | |||||
| for (int i = 1; i < kernel->in_kernels().size(); i++) { | |||||
| auto in_kernel = kernel->in_kernels()[i]; | |||||
| if (in_kernel != kernel) { | |||||
| cur_in_kernels.push_back(in_kernel); | |||||
| void NPUPassUtils::UpdateNC2NHPostKernelInTensors(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *post_kernel) { | |||||
| // For post_kernel that doesn't require insert trans kernel, because the output tensor of kernel(input tensor of | |||||
| // trans_kernel) is updated, replace the input tensor of post_kernel. | |||||
| auto post_in_tensors = post_kernel->in_tensors(); | |||||
| for (size_t i = 0; i < post_in_tensors.size(); i++) { | |||||
| if (post_in_tensors[i] == kernel->out_tensors()[0]) { | |||||
| post_in_tensors[i] = trans_kernel->in_tensors()[0]; | |||||
| break; | |||||
| } | } | ||||
| } | } | ||||
| kernel->set_in_kernels(cur_in_kernels); | |||||
| kernel->set_in_tensors({cur_kernel_in_tensors}); | |||||
| post_kernel->set_in_tensors(post_in_tensors); | |||||
| } | } | ||||
| void NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *post_kernel) { | |||||
| std::vector<Tensor *> post_in_tensors; | |||||
| for (auto post_in_tensor : post_kernel->in_tensors()) { | |||||
| if (post_in_tensor != kernel->out_tensors()[0]) { | |||||
| post_in_tensors.push_back(post_in_tensor); | |||||
| } else { | |||||
| post_in_tensors.push_back(trans_kernel->out_tensors()[0]); | |||||
| void NPUPassUtils::UpdateNC2NHTransNodePostKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *post_kernel) { | |||||
| // For post_kernel after trans, kernel should be replaced with trans_kernel. | |||||
| auto post_in_tensors = post_kernel->in_tensors(); | |||||
| if (kernel == nullptr) { | |||||
| post_in_tensors[0] = trans_kernel->out_tensors()[0]; | |||||
| } else { | |||||
| for (size_t i = 0; i < post_in_tensors.size(); i++) { | |||||
| if (post_in_tensors[i] == kernel->out_tensors()[0]) { | |||||
| post_in_tensors[i] = trans_kernel->out_tensors()[0]; | |||||
| break; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| post_kernel->set_in_tensors(post_in_tensors); | post_kernel->set_in_tensors(post_in_tensors); | ||||
| std::vector<kernel::LiteKernel *> post_in_kernels; | |||||
| for (auto in_kernel : post_kernel->in_kernels()) { | |||||
| if (in_kernel == kernel) { | |||||
| post_in_kernels.push_back(trans_kernel); | |||||
| } else { | |||||
| post_in_kernels.push_back(in_kernel); | |||||
| // The input tensor should be replaced with the output tensor of trans_kernel. | |||||
| std::vector<kernel::LiteKernel *> post_in_kernels = post_kernel->in_kernels(); | |||||
| for (size_t i = 0; i < post_in_kernels.size(); i++) { | |||||
| if (post_in_kernels[i] == kernel) { | |||||
| post_in_kernels[i] = trans_kernel; | |||||
| break; | |||||
| } | } | ||||
| } | } | ||||
| post_kernel->set_in_kernels(post_in_kernels); | post_kernel->set_in_kernels(post_in_kernels); | ||||
| post_kernel->set_in_tensors({post_in_tensors}); | |||||
| } | } | ||||
| bool NPUPassUtils::IsNhwc2Nchw(const kernel::LiteKernel *kernel) { | bool NPUPassUtils::IsNhwc2Nchw(const kernel::LiteKernel *kernel) { | ||||
| @@ -38,14 +38,16 @@ class NPUPassUtils { | |||||
| static void UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel, | static void UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel, | ||||
| kernel::LiteKernel *kernel); | kernel::LiteKernel *kernel); | ||||
| static void UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *post_kernel); | |||||
| static void UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel, | |||||
| std::vector<kernel::LiteKernel *> kernels); | |||||
| static void UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *pre_kernel); | |||||
| static void UpdateNH2NCTransNodePostKernel(kernel::LiteKernel *trans_kernel, kernel::LiteKernel *post_kernel); | |||||
| static void UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *post_kernel); | |||||
| static void UpdateNC2NHTransNodePostKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *post_kernel); | |||||
| static void UpdateNC2NHPostKernelInTensors(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *post_kernel); | |||||
| static bool IsNhwc2Nchw(const kernel::LiteKernel *kernel); | static bool IsNhwc2Nchw(const kernel::LiteKernel *kernel); | ||||
| @@ -20,10 +20,9 @@ | |||||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | ||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| using kernel::KERNEL_ARCH::kNPU; | using kernel::KERNEL_ARCH::kNPU; | ||||
| int NPUTransformPass::InsertPreNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||||
| std::vector<kernel::LiteKernel *> *trans_kernels, | |||||
| std::vector<Tensor *> *all_tensors) { | |||||
| int NPUTransformPass::InsertPreNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels) { | |||||
| bool is_input_kernel = kernel->in_kernels().empty(); | bool is_input_kernel = kernel->in_kernels().empty(); | ||||
| // single input | |||||
| if (is_input_kernel || kernel->in_kernels()[0]->desc().arch != kNPU || | if (is_input_kernel || kernel->in_kernels()[0]->desc().arch != kNPU || | ||||
| npu_trans_nodes.find(kernel->in_kernels()[0]->Type()) == npu_trans_nodes.end()) { | npu_trans_nodes.find(kernel->in_kernels()[0]->Type()) == npu_trans_nodes.end()) { | ||||
| kernel::LiteKernel *pre_kernel = nullptr; | kernel::LiteKernel *pre_kernel = nullptr; | ||||
| @@ -34,69 +33,86 @@ int NPUTransformPass::InsertPreNode(const InnerContext *context, kernel::LiteKer | |||||
| // Create pre transform kernel's out tensor. | // Create pre transform kernel's out tensor. | ||||
| auto nhwc_shape = kernel->in_tensors()[0]->shape(); | auto nhwc_shape = kernel->in_tensors()[0]->shape(); | ||||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | ||||
| auto tensor = new Tensor(kernel->in_tensors()[0]->data_type(), nchw_shape, schema::Format_NCHW, Tensor::VAR); | |||||
| auto tensor = | |||||
| new (std::nothrow) Tensor(kernel->in_tensors()[0]->data_type(), nchw_shape, schema::Format_NCHW, Tensor::VAR); | |||||
| if (tensor == nullptr) { | |||||
| MS_LOG(ERROR) << "New nchw tensor failed when inserting pre nhwc2nchw kernel."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| std::vector<Tensor *> pre_trans_out_tensors = {tensor}; | std::vector<Tensor *> pre_trans_out_tensors = {tensor}; | ||||
| all_tensors->push_back(pre_trans_out_tensors[0]); | |||||
| all_tensors_->push_back(pre_trans_out_tensors[0]); | |||||
| // Create pre transform kernel: Nhwc2Nchw | // Create pre transform kernel: Nhwc2Nchw | ||||
| auto name = kernel->name() + "_pre_trans" + "_Nhwc2Nchw_" + std::to_string(total++); | auto name = kernel->name() + "_pre_trans" + "_Nhwc2Nchw_" + std::to_string(total++); | ||||
| auto *trans_kernel = | auto *trans_kernel = | ||||
| NPUPassUtils::CreateNhwc2NchwKernel({kernel->in_tensors()[0]}, pre_trans_out_tensors, context, name); | |||||
| NPUPassUtils::CreateNhwc2NchwKernel({kernel->in_tensors()[0]}, pre_trans_out_tensors, context_, name); | |||||
| trans_kernels->push_back(trans_kernel); | trans_kernels->push_back(trans_kernel); | ||||
| insert_primitive_.push_back(trans_kernel->GetPrimitive()); | insert_primitive_.push_back(trans_kernel->GetPrimitive()); | ||||
| // Set in_kernels, out_kernels, in_tensors,out_tensors for transform kernel | |||||
| std::vector<kernel::LiteKernel *> pre_trans_in_kernel; | |||||
| if (is_input_kernel) { | |||||
| pre_trans_in_kernel = {}; | |||||
| } else { | |||||
| pre_trans_in_kernel = {pre_kernel}; | |||||
| // Set in_kernels, out_kernels, in_tensors, out_tensors for transform kernel | |||||
| std::vector<kernel::LiteKernel *> pre_trans_in_kernels; | |||||
| if (!is_input_kernel) { | |||||
| pre_trans_in_kernels = {pre_kernel}; | |||||
| } | } | ||||
| NPUPassUtils::UpdateKernel(trans_kernel, pre_trans_in_kernel, {kernel}, {kernel->in_tensors()[0]}, | |||||
| NPUPassUtils::UpdateKernel(trans_kernel, pre_trans_in_kernels, {kernel}, {kernel->in_tensors()[0]}, | |||||
| pre_trans_out_tensors); | pre_trans_out_tensors); | ||||
| if (pre_kernel != nullptr) { | if (pre_kernel != nullptr) { | ||||
| NPUPassUtils::UpdateNH2NCTransNodePreKernel(pre_kernel, trans_kernel, kernel); | NPUPassUtils::UpdateNH2NCTransNodePreKernel(pre_kernel, trans_kernel, kernel); | ||||
| } | } | ||||
| NPUPassUtils::UpdateNH2NCTransNodeAfterKernel(kernel, trans_kernel, pre_kernel); | |||||
| NPUPassUtils::UpdateNH2NCTransNodePostKernel(trans_kernel, kernel); | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int NPUTransformPass::InsertPostNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||||
| std::vector<kernel::LiteKernel *> *trans_kernels, | |||||
| std::vector<Tensor *> *all_tensors) { | |||||
| // Model output does not insert operator | |||||
| if (kernel->out_kernels().empty()) { | |||||
| return RET_OK; | |||||
| } | |||||
| // Single output multiple references | |||||
| int NPUTransformPass::InsertPostNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels) { | |||||
| bool is_output_kernel = kernel->out_kernels().empty(); | |||||
| // Get the post kernel that need insert trans kernel. | |||||
| // If no need for inserting trans kernel, the post kernel must be npu and in trans_nodes. | |||||
| std::vector<kernel::LiteKernel *> post_insert_kernels; | |||||
| for (int i = 0; i < kernel->out_kernels().size(); i++) { | for (int i = 0; i < kernel->out_kernels().size(); i++) { | ||||
| auto post_kernel = kernel->out_kernels().at(i); | |||||
| if (post_kernel->desc().arch == kNPU && npu_trans_nodes.find(post_kernel->Type()) != npu_trans_nodes.end()) { | |||||
| continue; | |||||
| auto post_kernel = kernel->out_kernels()[i]; | |||||
| if (post_kernel->desc().arch != kNPU || npu_trans_nodes.find(post_kernel->Type()) == npu_trans_nodes.end()) { | |||||
| post_insert_kernels.push_back(post_kernel); | |||||
| } | } | ||||
| // Create post transform kernel's out tensor. | |||||
| auto tensor = new Tensor(kernel->out_tensors()[0]->data_type(), kernel->out_tensors()[0]->shape(), | |||||
| schema::Format_NHWC, Tensor::VAR); | |||||
| std::vector<Tensor *> post_trans_out_tensors = {tensor}; | |||||
| all_tensors->push_back(post_trans_out_tensors[0]); | |||||
| } | |||||
| if (is_output_kernel || !post_insert_kernels.empty()) { | |||||
| // Create post transform kernel's in tensor. | |||||
| auto nhwc_shape = kernel->out_tensors()[0]->shape(); | |||||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||||
| auto tensor = | |||||
| new (std::nothrow) Tensor(kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR); | |||||
| if (tensor == nullptr) { | |||||
| MS_LOG(ERROR) << "New nchw tensor failed when inserting post nchw2nhwc kernel."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| std::vector<Tensor *> post_trans_in_tensors = {tensor}; | |||||
| all_tensors_->push_back(tensor); | |||||
| auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++); | |||||
| tensor->set_tensor_name(name + "/input0"); | |||||
| // Create post transform kernel: Nchw2Nhwc | // Create post transform kernel: Nchw2Nhwc | ||||
| auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++); | |||||
| auto *post_trans_kernel = | auto *post_trans_kernel = | ||||
| NPUPassUtils::CreateNchw2NhwcKernel(kernel->out_tensors(), post_trans_out_tensors, context, name); | |||||
| NPUPassUtils::CreateNchw2NhwcKernel(post_trans_in_tensors, kernel->out_tensors(), context_, name); | |||||
| // Set in_kernels, out_kernels, in_tensors,out_tensors for transform kernel | |||||
| NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {post_kernel}, kernel->out_tensors(), | |||||
| post_trans_out_tensors); | |||||
| // Set in_kernels, out_kernels, in_tensors, out_tensors for transform kernel | |||||
| NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, post_insert_kernels, post_trans_in_tensors, | |||||
| kernel->out_tensors()); | |||||
| insert_primitive_.push_back(post_trans_kernel->GetPrimitive()); | insert_primitive_.push_back(post_trans_kernel->GetPrimitive()); | ||||
| trans_kernels->push_back(post_trans_kernel); | trans_kernels->push_back(post_trans_kernel); | ||||
| NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, post_kernel); | |||||
| NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel, post_trans_kernel, post_kernel); | |||||
| if (!is_output_kernel) { | |||||
| for (int i = 0; i < kernel->out_kernels().size(); i++) { | |||||
| auto post_kernel = kernel->out_kernels()[i]; | |||||
| if (find(post_insert_kernels.begin(), post_insert_kernels.end(), post_kernel) != post_insert_kernels.end()) { | |||||
| NPUPassUtils::UpdateNC2NHTransNodePostKernel(kernel, post_trans_kernel, post_kernel); | |||||
| } else { | |||||
| NPUPassUtils::UpdateNC2NHPostKernelInTensors(kernel, post_trans_kernel, post_kernel); | |||||
| } | |||||
| } | |||||
| } | |||||
| NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, post_insert_kernels); | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -108,13 +124,25 @@ int NPUTransformPass::Run() { | |||||
| i++; | i++; | ||||
| continue; | continue; | ||||
| } | } | ||||
| // insert pre_kernels before kernel in vector | |||||
| // modify loop index add (pre_kernels.size() + 1) to the post_kernels insert location | |||||
| std::vector<kernel::LiteKernel *> pre_kernels; | std::vector<kernel::LiteKernel *> pre_kernels; | ||||
| InsertPreNode(context_, kernel, &pre_kernels, all_tensors_); | |||||
| auto ret = InsertPreNodes(kernel, &pre_kernels); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Insert nhwc2nchw kernel before kernel " << kernel->name() << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| all_kernels_->insert(all_kernels_->begin() + i, pre_kernels.begin(), pre_kernels.end()); | all_kernels_->insert(all_kernels_->begin() + i, pre_kernels.begin(), pre_kernels.end()); | ||||
| i += (pre_kernels.size() + 1); | i += (pre_kernels.size() + 1); | ||||
| // insert post_kernels after kernel in vector | |||||
| // modify loop index add post_kernels.size() to the next kernel in the origin vector | |||||
| std::vector<kernel::LiteKernel *> post_kernels; | std::vector<kernel::LiteKernel *> post_kernels; | ||||
| InsertPostNode(context_, kernel, &post_kernels, all_tensors_); | |||||
| ret = InsertPostNodes(kernel, &post_kernels); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Insert nchw2nhwc kernel after kernel " << kernel->name() << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| all_kernels_->insert(all_kernels_->begin() + i, post_kernels.begin(), post_kernels.end()); | all_kernels_->insert(all_kernels_->begin() + i, post_kernels.begin(), post_kernels.end()); | ||||
| i += post_kernels.size(); | i += post_kernels.size(); | ||||
| } | } | ||||
| @@ -42,11 +42,9 @@ class NPUTransformPass : public NPUBasePass { | |||||
| } | } | ||||
| private: | private: | ||||
| int InsertPreNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||||
| std::vector<kernel::LiteKernel *> *trans_kernels, std::vector<Tensor *> *all_tensors); | |||||
| int InsertPreNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels); | |||||
| int InsertPostNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||||
| std::vector<kernel::LiteKernel *> *trans_kernels, std::vector<Tensor *> *all_tensors); | |||||
| int InsertPostNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels); | |||||
| private: | private: | ||||
| int total = 0; | int total = 0; | ||||
| @@ -21,6 +21,9 @@ | |||||
| using mindspore::kernel::KERNEL_ARCH::kNPU; | using mindspore::kernel::KERNEL_ARCH::kNPU; | ||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| using mindspore::schema::ActivationType_NO_ACTIVATION; | |||||
| using mindspore::schema::ActivationType_RELU; | |||||
| using mindspore::schema::ActivationType_RELU6; | |||||
| using mindspore::schema::PrimitiveType_Add; | using mindspore::schema::PrimitiveType_Add; | ||||
| using mindspore::schema::PrimitiveType_Div; | using mindspore::schema::PrimitiveType_Div; | ||||
| using mindspore::schema::PrimitiveType_Equal; | using mindspore::schema::PrimitiveType_Equal; | ||||
| @@ -118,7 +121,6 @@ int ArithmeticNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||||
| case PrimitiveType_GreaterEqual: | case PrimitiveType_GreaterEqual: | ||||
| op = CreateOperator<hiai::op::GreaterEqual>(npu_inputs, name_); | op = CreateOperator<hiai::op::GreaterEqual>(npu_inputs, name_); | ||||
| break; | break; | ||||
| default: | default: | ||||
| MS_LOG(ERROR) << "Unsupported primitive type:" | MS_LOG(ERROR) << "Unsupported primitive type:" | ||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(primitive_->Type())); | << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(primitive_->Type())); | ||||
| @@ -129,16 +131,42 @@ int ArithmeticNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| op_ = op; | op_ = op; | ||||
| if (activation_type_ != ActivationType_NO_ACTIVATION) { | |||||
| act_ = new (std::nothrow) hiai::op::Activation(name_ + "_act"); | |||||
| if (act_ == nullptr) { | |||||
| MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| act_->set_input_x(*op_); | |||||
| if (activation_type_ == ActivationType_RELU) { | |||||
| act_->set_attr_mode(1); | |||||
| } else if (activation_type_ == ActivationType_RELU6) { | |||||
| act_->set_attr_mode(14); | |||||
| } else { | |||||
| MS_LOG(ERROR) << "Unsupport activation type for op " << name_; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| ge::Operator *mindspore::kernel::ArithmeticNPUKernel::GetNPUOp() { return this->op_; } | |||||
| ge::Operator *mindspore::kernel::ArithmeticNPUKernel::GetNPUOp() { | |||||
| if (activation_type_ == ActivationType_NO_ACTIVATION) { | |||||
| return op_; | |||||
| } | |||||
| return act_; | |||||
| } | |||||
| ArithmeticNPUKernel::~ArithmeticNPUKernel() { | ArithmeticNPUKernel::~ArithmeticNPUKernel() { | ||||
| if (op_ != nullptr) { | if (op_ != nullptr) { | ||||
| delete op_; | delete op_; | ||||
| op_ = nullptr; | op_ = nullptr; | ||||
| } | } | ||||
| if (act_ != nullptr) { | |||||
| delete act_; | |||||
| act_ = nullptr; | |||||
| } | |||||
| } | } | ||||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Mul, NPUKernelCreator<ArithmeticNPUKernel>) | REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Mul, NPUKernelCreator<ArithmeticNPUKernel>) | ||||
| @@ -17,15 +17,18 @@ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_ | #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_ | ||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_ | ||||
| #include <vector> | #include <vector> | ||||
| #include "nnacl/arithmetic.h" | |||||
| #include "src/runtime/kernel/npu/npu_kernel.h" | #include "src/runtime/kernel/npu/npu_kernel.h" | ||||
| #include "include/graph/op/math_defs.h" | |||||
| #include "include/graph/op/all_ops.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class ArithmeticNPUKernel : public NPUKernel { | class ArithmeticNPUKernel : public NPUKernel { | ||||
| public: | public: | ||||
| ArithmeticNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ArithmeticNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||||
| activation_type_ = reinterpret_cast<ArithmeticParameter *>(parameter)->activation_type_; | |||||
| } | |||||
| ~ArithmeticNPUKernel() override; | ~ArithmeticNPUKernel() override; | ||||
| int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | ||||
| @@ -36,7 +39,9 @@ class ArithmeticNPUKernel : public NPUKernel { | |||||
| ge::Operator *GetNPUOp() override; | ge::Operator *GetNPUOp() override; | ||||
| private: | private: | ||||
| int activation_type_; | |||||
| ge::Operator *op_ = nullptr; | ge::Operator *op_ = nullptr; | ||||
| hiai::op::Activation *act_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_ | ||||
| @@ -0,0 +1,57 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "src/runtime/kernel/npu/squeeze_npu.h" | |||||
| #include "src/kernel_registry.h" | |||||
| #include "src/runtime/agent/npu/npu_converter_utils.h" | |||||
| using mindspore::kernel::KERNEL_ARCH::kNPU; | |||||
| using mindspore::lite::KernelRegistrar; | |||||
| using mindspore::schema::PrimitiveType_Squeeze; | |||||
| namespace mindspore::kernel { | |||||
| int SqueezeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||||
| OpParameter *opParameter) { | |||||
| return RET_OK; | |||||
| } | |||||
| int SqueezeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||||
| const std::vector<lite::Tensor *> &outputs, | |||||
| const std::vector<ge::Operator *> &npu_inputs) { | |||||
| op_ = new (std::nothrow) hiai::op::Squeeze(name_); | |||||
| if (op_ == nullptr) { | |||||
| MS_LOG(ERROR) << "New squeeze npu operator for op " << name_ << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| std::vector<int64_t> axes; | |||||
| for (int i = 0; i < axes_.size(); i++) { | |||||
| axes.push_back(axes_[i]); | |||||
| } | |||||
| op_->set_input_x(*npu_inputs[0]); | |||||
| op_->set_attr_axis(axes); | |||||
| return RET_OK; | |||||
| } | |||||
| ge::Operator *mindspore::kernel::SqueezeNPUKernel::GetNPUOp() { return this->op_; } | |||||
| SqueezeNPUKernel::~SqueezeNPUKernel() { | |||||
| if (op_ != nullptr) { | |||||
| delete op_; | |||||
| op_ = nullptr; | |||||
| } | |||||
| } | |||||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Squeeze, NPUKernelCreator<SqueezeNPUKernel>) | |||||
| } // namespace mindspore::kernel | |||||
| @@ -0,0 +1,46 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SQUEEZE_NPU_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SQUEEZE_NPU_H_ | |||||
| #include <vector> | |||||
| #include "src/ops/squeeze.h" | |||||
| #include "src/runtime/kernel/npu/npu_kernel.h" | |||||
| #include "include/graph/op/all_ops.h" | |||||
| namespace mindspore::kernel { | |||||
| class SqueezeNPUKernel : public NPUKernel { | |||||
| public: | |||||
| SqueezeNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||||
| const mindspore::lite::PrimitiveC *primitive) | |||||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||||
| auto squeeze = reinterpret_cast<const mindspore::lite::Squeeze *>(primitive); | |||||
| axes_ = squeeze->GetAxis(); | |||||
| } | |||||
| ~SqueezeNPUKernel() override; | |||||
| int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||||
| OpParameter *opParameter) override; | |||||
| int SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||||
| const std::vector<ge::Operator *> &npu_inputs) override; | |||||
| ge::Operator *GetNPUOp() override; | |||||
| private: | |||||
| hiai::op::Squeeze *op_ = nullptr; | |||||
| vector<int> axes_; | |||||
| }; | |||||
| } // namespace mindspore::kernel | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SQUEEZE_NPU_H_ | |||||
| @@ -16,6 +16,7 @@ mobilenet_v1_1.0_192.tflite 6 | |||||
| mobilenet_v1_1.0_224.tflite 2.5 | mobilenet_v1_1.0_224.tflite 2.5 | ||||
| mobilenet_v2_1.0_224.tflite 2.5 | mobilenet_v2_1.0_224.tflite 2.5 | ||||
| squeezenet.tflite 2.5 | squeezenet.tflite 2.5 | ||||
| inception_resnet_v2.tflite 2 | |||||
| inception_v3.tflite 1 | inception_v3.tflite 1 | ||||
| inception_v4.tflite 0.5 | inception_v4.tflite 0.5 | ||||
| efficientnet_lite0_fp32_2.tflite 1 | efficientnet_lite0_fp32_2.tflite 1 | ||||
| @@ -23,6 +24,7 @@ efficientnet_lite1_fp32_2.tflite 1 | |||||
| efficientnet_lite2_fp32_2.tflite 1 | efficientnet_lite2_fp32_2.tflite 1 | ||||
| efficientnet_lite3_fp32_2.tflite 1 | efficientnet_lite3_fp32_2.tflite 1 | ||||
| efficientnet_lite4_fp32_2.tflite 1 | efficientnet_lite4_fp32_2.tflite 1 | ||||
| deeplabv3_1_default_1.tflite 2.5 | |||||
| 6c_seg_nomean_20200610 1.5 | 6c_seg_nomean_20200610 1.5 | ||||
| ml_video_edit_person_divison 0.5 | ml_video_edit_person_divison 0.5 | ||||
| porseg_tmp.onnx 1 2 | porseg_tmp.onnx 1 2 | ||||