| @@ -1,126 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/agent/npu/optimizer/npu_add_transform_pass.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | |||
| namespace mindspore::lite { | |||
| using kernel::KERNEL_ARCH::kNPU; | |||
| int NPUAddTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *after_kernel) { | |||
| std::vector<kernel::LiteKernel *> out_kernels; | |||
| for (auto out_kernel : kernel->out_kernels()) { | |||
| if (out_kernel == after_kernel) { | |||
| out_kernels.push_back(trans_kernel); | |||
| } else { | |||
| out_kernels.push_back(out_kernel); | |||
| } | |||
| } | |||
| NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors()); | |||
| return RET_OK; | |||
| } | |||
| int NPUAddTransformPass::InsertNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||
| std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| auto kernel = *it; | |||
| for (auto out_kernel : kernel->out_kernels()) { | |||
| if (out_kernel->Type() == schema::PrimitiveType_Nhwc2Nchw) { | |||
| continue; | |||
| } | |||
| std::vector<int> nh2nc_shape = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[3], | |||
| kernel->out_tensors()[0]->shape()[1], kernel->out_tensors()[0]->shape()[2]}; | |||
| auto nh2nc_tensor = | |||
| new Tensor(kernel->out_tensors()[0]->data_type(), nh2nc_shape, schema::Format_NHWC, Tensor::VAR); | |||
| std::vector<Tensor *> nh2nc_tensors = {nh2nc_tensor}; | |||
| all_tensors->push_back(nh2nc_tensors[0]); | |||
| auto nc2nh_shape = {nh2nc_shape[0], nh2nc_shape[2], nh2nc_shape[3], nh2nc_shape[1]}; | |||
| auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nc2nh_shape, schema::Format_NCHW, Tensor::VAR); | |||
| std::vector<Tensor *> nc2nh_tensors = {nc2nh_tensor}; | |||
| all_tensors->push_back(nc2nh_tensors[0]); | |||
| auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++); | |||
| auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); | |||
| all_kernels->push_back(nh2nc_kernel); | |||
| insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); | |||
| auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++); | |||
| auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); | |||
| all_kernels->push_back(nc2nh_kernel); | |||
| insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); | |||
| NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors); | |||
| NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors); | |||
| UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel); | |||
| UpdateNC2NHTransNodeAfterKernel(kernel, nc2nh_kernel, out_kernel); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int NPUAddTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *next_kernel) { | |||
| std::vector<Tensor *> next_in_tensors; | |||
| for (auto next_in_tensor : next_kernel->in_tensors()) { | |||
| if (next_in_tensor != kernel->out_tensors()[0]) { | |||
| next_in_tensors.push_back(next_in_tensor); | |||
| } else { | |||
| next_in_tensors.push_back(trans_kernel->out_tensors()[0]); | |||
| } | |||
| } | |||
| next_kernel->set_in_tensors(next_in_tensors); | |||
| std::vector<kernel::LiteKernel *> next_in_kernels; | |||
| for (auto in_kernel : next_kernel->in_kernels()) { | |||
| if (in_kernel == kernel) { | |||
| next_in_kernels.push_back(trans_kernel); | |||
| } else { | |||
| next_in_kernels.push_back(in_kernel); | |||
| } | |||
| } | |||
| NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors, | |||
| next_kernel->out_tensors()); | |||
| return RET_OK; | |||
| } | |||
| int NPUAddTransformPass::Run() { | |||
| if (context_->IsNpuEnabled()) { | |||
| std::vector<kernel::LiteKernel *> new_kernels; | |||
| for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) { | |||
| auto kernel = *it; | |||
| new_kernels.push_back(kernel); | |||
| if (kernel->desc().arch != kNPU) { | |||
| continue; | |||
| } | |||
| if (kernel->Type() == schema::PrimitiveType_Add && kernel->out_kernels().size() >= 2) { | |||
| int sum = 0; | |||
| for (auto i : kernel->out_kernels()) { | |||
| if (i->Type() == schema::PrimitiveType_Nhwc2Nchw) { | |||
| sum++; | |||
| } | |||
| } | |||
| if (kernel->out_kernels().size() != sum) { | |||
| InsertNode(context_, it, &new_kernels, all_tensors_); | |||
| } | |||
| } | |||
| } | |||
| all_kernels_->clear(); | |||
| for (int i = 0; i < new_kernels.size(); i++) { | |||
| all_kernels_->push_back(new_kernels[i]); | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::lite | |||
| @@ -1,59 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "src/ops/primitive_c.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_base_pass.h" | |||
| namespace mindspore::lite { | |||
| class NPUAddTransformPass : public NPUBasePass { | |||
| public: | |||
| explicit NPUAddTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| context_ = context; | |||
| all_kernels_ = all_kernels; | |||
| all_tensors_ = all_tensors; | |||
| name_ = "NPUConcatTransformPass"; | |||
| } | |||
| ~NPUAddTransformPass() override { | |||
| for (auto primitive : insert_primitive_) { | |||
| delete primitive; | |||
| } | |||
| insert_primitive_.clear(); | |||
| } | |||
| int Run() override; | |||
| private: | |||
| int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *after_kernel); | |||
| int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *next_kernel); | |||
| int InsertNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||
| std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors); | |||
| private: | |||
| int total = 0; | |||
| const InnerContext *context_; | |||
| std::vector<kernel::LiteKernel *> *all_kernels_; | |||
| std::vector<const PrimitiveC *> insert_primitive_; | |||
| std::vector<Tensor *> *all_tensors_; | |||
| }; | |||
| } // namespace mindspore::lite | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_ | |||
| @@ -1,126 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | |||
| namespace mindspore::lite { | |||
| using kernel::KERNEL_ARCH::kNPU; | |||
| int NPUConcatTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *after_kernel) { | |||
| std::vector<kernel::LiteKernel *> out_kernels; | |||
| for (auto out_kernel : kernel->out_kernels()) { | |||
| if (out_kernel == after_kernel) { | |||
| out_kernels.push_back(trans_kernel); | |||
| } else { | |||
| out_kernels.push_back(out_kernel); | |||
| } | |||
| } | |||
| NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors()); | |||
| return RET_OK; | |||
| } | |||
| int NPUConcatTransformPass::InsertNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||
| std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| for (auto kernel : (*it)->in_kernels()) { | |||
| if (kernel->Type() == schema::PrimitiveType_Nchw2Nhwc) { | |||
| continue; | |||
| } | |||
| auto out_kernel = (*it); | |||
| std::vector<int> nh2nc_shape = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[3], | |||
| kernel->out_tensors()[0]->shape()[1], kernel->out_tensors()[0]->shape()[2]}; | |||
| auto nh2nc_tensor = | |||
| new Tensor(kernel->out_tensors()[0]->data_type(), nh2nc_shape, schema::Format_NHWC, Tensor::VAR); | |||
| std::vector<Tensor *> nh2nc_tensors = {nh2nc_tensor}; | |||
| all_tensors->push_back(nh2nc_tensors[0]); | |||
| auto nc2nh_shape = {nh2nc_shape[0], nh2nc_shape[2], nh2nc_shape[3], nh2nc_shape[1]}; | |||
| auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nc2nh_shape, schema::Format_NCHW, Tensor::VAR); | |||
| std::vector<Tensor *> nc2nh_tensors = {nc2nh_tensor}; | |||
| all_tensors->push_back(nc2nh_tensors[0]); | |||
| auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++); | |||
| auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); | |||
| all_kernels->push_back(nh2nc_kernel); | |||
| insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); | |||
| auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++); | |||
| auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); | |||
| all_kernels->push_back(nc2nh_kernel); | |||
| insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); | |||
| NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors); | |||
| NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors); | |||
| UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel); | |||
| UpdateNC2NHTransNodeAfterKernel(kernel, nc2nh_kernel, out_kernel); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int NPUConcatTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, | |||
| kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *next_kernel) { | |||
| std::vector<Tensor *> next_in_tensors; | |||
| for (auto next_in_tensor : next_kernel->in_tensors()) { | |||
| if (next_in_tensor != kernel->out_tensors()[0]) { | |||
| next_in_tensors.push_back(next_in_tensor); | |||
| } else { | |||
| next_in_tensors.push_back(trans_kernel->out_tensors()[0]); | |||
| } | |||
| } | |||
| next_kernel->set_in_tensors(next_in_tensors); | |||
| std::vector<kernel::LiteKernel *> next_in_kernels; | |||
| for (auto in_kernel : next_kernel->in_kernels()) { | |||
| if (in_kernel == kernel) { | |||
| next_in_kernels.push_back(trans_kernel); | |||
| } else { | |||
| next_in_kernels.push_back(in_kernel); | |||
| } | |||
| } | |||
| NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors, | |||
| next_kernel->out_tensors()); | |||
| return RET_OK; | |||
| } | |||
| int NPUConcatTransformPass::Run() { | |||
| if (context_->IsNpuEnabled()) { | |||
| std::vector<kernel::LiteKernel *> new_kernels; | |||
| for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) { | |||
| auto kernel = *it; | |||
| if (kernel->desc().arch != kNPU) { | |||
| new_kernels.push_back(kernel); | |||
| continue; | |||
| } | |||
| if (kernel->Type() == schema::PrimitiveType_Concat && kernel->in_kernels().size() >= 2) { | |||
| int sum = 0; | |||
| for (auto i : kernel->in_kernels()) { | |||
| if (i->Type() == schema::PrimitiveType_Nchw2Nhwc) { | |||
| sum++; | |||
| } | |||
| } | |||
| if (kernel->out_kernels().size() != sum) { | |||
| InsertNode(context_, it, &new_kernels, all_tensors_); | |||
| } | |||
| } | |||
| new_kernels.push_back(kernel); | |||
| } | |||
| all_kernels_->clear(); | |||
| for (int i = 0; i < new_kernels.size(); i++) { | |||
| all_kernels_->push_back(new_kernels[i]); | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::lite | |||
| @@ -34,6 +34,28 @@ bool CheckFusion(kernel::LiteKernel *kernel) { | |||
| return post_flag; | |||
| } | |||
| bool CheckFormatFusion(kernel::LiteKernel *kernel) { | |||
| if (kernel->Type() == schema::PrimitiveType_Nhwc2Nchw) { | |||
| return std::all_of( | |||
| kernel->out_kernels().begin(), kernel->out_kernels().end(), | |||
| [](const kernel::LiteKernel *kernel) { return kernel->Type() == schema::PrimitiveType_Nchw2Nhwc; }); | |||
| } | |||
| if (kernel->Type() == schema::PrimitiveType_Nchw2Nhwc) { | |||
| return std::all_of( | |||
| kernel->out_kernels().begin(), kernel->out_kernels().end(), | |||
| [](const kernel::LiteKernel *kernel) { return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw; }); | |||
| } | |||
| return false; | |||
| } | |||
| void NPUFusionPass::RemoveAndFreeKernel(kernel::LiteKernel *cur_kernel) { | |||
| auto itr = find(kernels->begin(), kernels->end(), cur_kernel); | |||
| if (itr != kernels->end()) { | |||
| kernels->erase(itr); | |||
| } | |||
| delete cur_kernel; | |||
| } | |||
| void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) { | |||
| for (auto in_kernel : cur_kernel->in_kernels()) { | |||
| auto pre_kernel = in_kernel->in_kernels()[0]; | |||
| @@ -55,6 +77,7 @@ void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) { | |||
| } | |||
| } | |||
| cur_kernel->set_in_kernels(cur_in_kernels); | |||
| RemoveAndFreeKernel(in_kernel); | |||
| } | |||
| } | |||
| @@ -79,6 +102,7 @@ void NPUFusionPass::UpdatePostKernels(kernel::LiteKernel *cur_kernel) { | |||
| } | |||
| } | |||
| cur_kernel->set_out_kernels(cur_out_kernels); | |||
| RemoveAndFreeKernel(out_kernel); | |||
| } | |||
| } | |||
| @@ -163,34 +187,52 @@ int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) { | |||
| if (kernel->out_kernels().empty()) { | |||
| return RET_OK; | |||
| } | |||
| if (!std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) { | |||
| return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw; | |||
| })) { | |||
| if (!CheckFormatFusion(kernel)) { | |||
| return RET_OK; | |||
| } | |||
| auto pre_kernel = kernel->in_kernels()[0]; | |||
| auto pre_out_kernels = pre_kernel->out_kernels(); | |||
| for (size_t i = 0; i < pre_out_kernels.size(); i++) { | |||
| if (pre_out_kernels[i] == kernel) { | |||
| pre_out_kernels.erase(pre_out_kernels.begin() + i); | |||
| break; | |||
| } | |||
| auto pre_kernel = kernel->in_kernels()[0]; | |||
| auto in_tensor = kernel->in_tensors()[0]; | |||
| auto out_tensor = kernel->out_tensors()[0]; | |||
| auto tensor_itr = std::find(pre_kernel->out_tensors().begin(), pre_kernel->out_tensors().end(), in_tensor); | |||
| if (tensor_itr != pre_kernel->out_tensors().end()) { | |||
| in_tensor = *tensor_itr; | |||
| } else { | |||
| MS_LOG(ERROR) << "Can't find the connneted tensor between kernel " << kernel->name() << " and it's pre_kernel."; | |||
| return RET_ERROR; | |||
| } | |||
| for (const auto &nc2nh : kernel->out_kernels()) { | |||
| for (const auto &post_kernel : nc2nh->out_kernels()) { | |||
| std::vector<kernel::LiteKernel *> pre_insert_kernels; | |||
| for (const auto &trans_kernel : kernel->out_kernels()) { | |||
| for (const auto &post_kernel : trans_kernel->out_kernels()) { | |||
| // update tensor | |||
| auto tensors_vec = post_kernel->in_tensors(); | |||
| for (size_t i = 0; i < tensors_vec.size(); i++) { | |||
| if (tensors_vec[i] == out_tensor) { | |||
| tensors_vec[i] = in_tensor; | |||
| break; | |||
| } | |||
| } | |||
| post_kernel->set_in_tensors(tensors_vec); | |||
| // update kernel | |||
| auto post_in_kernels = post_kernel->in_kernels(); | |||
| for (size_t i = 0; i < post_in_kernels.size(); i++) { | |||
| if (post_in_kernels[i] == nc2nh) { | |||
| if (post_in_kernels[i] == trans_kernel) { | |||
| post_in_kernels[i] = pre_kernel; | |||
| break; | |||
| } | |||
| } | |||
| post_kernel->set_in_kernels(post_in_kernels); | |||
| pre_out_kernels.push_back(post_kernel); | |||
| pre_insert_kernels.push_back(post_kernel); | |||
| RemoveAndFreeKernel(trans_kernel); | |||
| } | |||
| } | |||
| pre_kernel->set_out_kernels(pre_out_kernels); | |||
| auto pre_out_kernels = pre_kernel->out_kernels(); | |||
| auto itr = find(pre_out_kernels.begin(), pre_out_kernels.end(), kernel); | |||
| pre_out_kernels.insert(itr, pre_insert_kernels.begin(), pre_insert_kernels.end()); | |||
| pre_kernel->set_in_kernels(pre_out_kernels); | |||
| RemoveAndFreeKernel(kernel); | |||
| return RET_OK; | |||
| } | |||
| @@ -201,6 +243,7 @@ int NPUFusionPass::Run() { | |||
| ConcatFusion(kernel); | |||
| continue; | |||
| case schema::PrimitiveType_Add: | |||
| case schema::PrimitiveType_Activation: | |||
| AddFusion(kernel); | |||
| continue; | |||
| case schema::PrimitiveType_Nchw2Nhwc: | |||
| @@ -33,11 +33,12 @@ class NPUFusionPass : public NPUBasePass { | |||
| int Run() override; | |||
| protected: | |||
| void RemoveAndFreeKernel(kernel::LiteKernel *cur_kernel); | |||
| void UpdatePreKernels(kernel::LiteKernel *kernel); | |||
| void UpdatePostKernels(kernel::LiteKernel *kernel); | |||
| int ConcatFusion(kernel::LiteKernel *kernel); | |||
| int AddFusion(kernel::LiteKernel *kernel); | |||
| int FormatFusion(kernel::LiteKernel *kernel); | |||
| void UpdatePreKernels(kernel::LiteKernel *kernel); | |||
| void UpdatePostKernels(kernel::LiteKernel *kernel); | |||
| private: | |||
| std::vector<kernel::LiteKernel *> *kernels; | |||
| @@ -0,0 +1,139 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/agent/npu/optimizer/npu_insert_transform_pass.h" | |||
| #include <set> | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | |||
| namespace mindspore::lite { | |||
| using kernel::KERNEL_ARCH::kNPU; | |||
| enum InsertState { InsertNone, PreInsert, PostInsert }; | |||
| std::set<mindspore::schema::PrimitiveType> npu_insert_nodes = {schema::PrimitiveType_Concat, schema::PrimitiveType_Add}; | |||
| int GetInsertState(kernel::LiteKernel *kernel) { | |||
| if (npu_insert_nodes.find(kernel->Type()) == npu_insert_nodes.end()) { | |||
| return InsertNone; | |||
| } | |||
| auto pre_flag = | |||
| std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), | |||
| [](const kernel::LiteKernel *kernel) { return kernel->Type() == schema::PrimitiveType_Nchw2Nhwc; }); | |||
| auto post_flag = | |||
| std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), | |||
| [](const kernel::LiteKernel *kernel) { return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw; }); | |||
| if (pre_flag && !post_flag) { | |||
| return PostInsert; | |||
| } | |||
| if (!pre_flag && post_flag) { | |||
| return PreInsert; | |||
| } | |||
| return InsertNone; | |||
| } | |||
| int NPUInsertTransformPass::InsertPreNode(const InnerContext *context, kernel::LiteKernel *cur_kernel, | |||
| std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| for (auto kernel : cur_kernel->in_kernels()) { | |||
| if (kernel->Type() == schema::PrimitiveType_Nchw2Nhwc) { | |||
| continue; | |||
| } | |||
| auto nhwc_shape = cur_kernel->out_tensors()[0]->shape(); | |||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||
| auto nh2nc_tensor = new Tensor(kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR); | |||
| std::vector<Tensor *> nh2nc_tensors = {nh2nc_tensor}; | |||
| all_tensors->push_back(nh2nc_tensors[0]); | |||
| auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nhwc_shape, schema::Format_NCHW, Tensor::VAR); | |||
| std::vector<Tensor *> nc2nh_tensors = {nc2nh_tensor}; | |||
| all_tensors->push_back(nc2nh_tensors[0]); | |||
| auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++); | |||
| auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); | |||
| all_kernels->push_back(nh2nc_kernel); | |||
| insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); | |||
| auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++); | |||
| auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); | |||
| all_kernels->push_back(nc2nh_kernel); | |||
| insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); | |||
| NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors); | |||
| NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {cur_kernel}, nh2nc_tensors, nc2nh_tensors); | |||
| NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, cur_kernel); | |||
| NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel, nc2nh_kernel, cur_kernel); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int NPUInsertTransformPass::InsertPostNode(const InnerContext *context, kernel::LiteKernel *cur_kernel, | |||
| std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| for (auto out_kernel : cur_kernel->out_kernels()) { | |||
| if (out_kernel->Type() == schema::PrimitiveType_Nhwc2Nchw) { | |||
| continue; | |||
| } | |||
| auto nhwc_shape = cur_kernel->out_tensors()[0]->shape(); | |||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||
| auto nh2nc_tensor = | |||
| new Tensor(cur_kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR); | |||
| std::vector<Tensor *> nh2nc_tensors = {nh2nc_tensor}; | |||
| all_tensors->push_back(nh2nc_tensors[0]); | |||
| auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nhwc_shape, schema::Format_NCHW, Tensor::VAR); | |||
| std::vector<Tensor *> nc2nh_tensors = {nc2nh_tensor}; | |||
| all_tensors->push_back(nc2nh_tensors[0]); | |||
| auto nh2nc_name = cur_kernel->name() + "_nh2nc_" + std::to_string(total++); | |||
| auto *nh2nc_kernel = | |||
| NPUPassUtils::CreateNhwc2NchwKernel(cur_kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); | |||
| all_kernels->push_back(nh2nc_kernel); | |||
| insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); | |||
| auto nc2nh_name = cur_kernel->name() + "_nc2nh_" + std::to_string(total++); | |||
| auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); | |||
| all_kernels->push_back(nc2nh_kernel); | |||
| insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); | |||
| NPUPassUtils::UpdateKernel(nh2nc_kernel, {cur_kernel}, {nc2nh_kernel}, cur_kernel->out_tensors(), nh2nc_tensors); | |||
| NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors); | |||
| NPUPassUtils::UpdateNH2NCTransNodePreKernel(cur_kernel, nh2nc_kernel, out_kernel); | |||
| NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(cur_kernel, nc2nh_kernel, out_kernel); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int NPUInsertTransformPass::Run() { | |||
| if (!context_->IsNpuEnabled()) { | |||
| return RET_OK; | |||
| } | |||
| for (size_t i = 0; i < all_kernels_->size(); i++) { | |||
| auto kernel = (*all_kernels_)[i]; | |||
| if (kernel->desc().arch != kNPU) { | |||
| continue; | |||
| } | |||
| auto insert_state = GetInsertState(kernel); | |||
| if (insert_state == PreInsert) { | |||
| std::vector<kernel::LiteKernel *> pre_kernels; | |||
| InsertPreNode(context_, kernel, &pre_kernels, all_tensors_); | |||
| all_kernels_->insert(all_kernels_->begin() + i, pre_kernels.begin(), pre_kernels.end()); | |||
| i += pre_kernels.size(); | |||
| } | |||
| if (insert_state == PostInsert) { | |||
| std::vector<kernel::LiteKernel *> post_kernels; | |||
| InsertPostNode(context_, kernel, &post_kernels, all_tensors_); | |||
| all_kernels_->insert(all_kernels_->begin() + i + 1, post_kernels.begin(), post_kernels.end()); | |||
| i += post_kernels.size(); | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::lite | |||
| @@ -14,23 +14,25 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_INSERT_TRANSFORM_PASS_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_INSERT_TRANSFORM_PASS_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "src/ops/primitive_c.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_base_pass.h" | |||
| namespace mindspore::lite { | |||
| class NPUConcatTransformPass : public NPUBasePass { | |||
| class NPUInsertTransformPass : public NPUBasePass { | |||
| public: | |||
| explicit NPUConcatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||
| explicit NPUInsertTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| context_ = context; | |||
| all_kernels_ = all_kernels; | |||
| all_tensors_ = all_tensors; | |||
| name_ = "NPUConcatTransformPass"; | |||
| name_ = "NPUInsertTransformPass"; | |||
| } | |||
| ~NPUConcatTransformPass() override { | |||
| ~NPUInsertTransformPass() override { | |||
| for (auto primitive : insert_primitive_) { | |||
| delete primitive; | |||
| } | |||
| @@ -39,14 +41,11 @@ class NPUConcatTransformPass : public NPUBasePass { | |||
| int Run() override; | |||
| private: | |||
| int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *after_kernel); | |||
| int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *next_kernel); | |||
| int InsertPreNode(const InnerContext *context, kernel::LiteKernel *cur_kernel, | |||
| std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors); | |||
| int InsertNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||
| std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors); | |||
| int InsertPostNode(const InnerContext *context, kernel::LiteKernel *cur_kernel, | |||
| std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors); | |||
| private: | |||
| int total = 0; | |||
| @@ -56,4 +55,4 @@ class NPUConcatTransformPass : public NPUBasePass { | |||
| std::vector<const PrimitiveC *> insert_primitive_; | |||
| }; | |||
| } // namespace mindspore::lite | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_ | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_INSERT_TRANSFORM_PASS_H_ | |||
| @@ -99,4 +99,76 @@ void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vector<ke | |||
| kernel->set_in_kernels(in_kernels); | |||
| kernel->set_out_kernels(out_kernels); | |||
| } | |||
| void NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *after_kernel) { | |||
| std::vector<kernel::LiteKernel *> out_kernels; | |||
| for (auto out_kernel : kernel->out_kernels()) { | |||
| if (out_kernel == after_kernel) { | |||
| out_kernels.push_back(trans_kernel); | |||
| } else { | |||
| out_kernels.push_back(out_kernel); | |||
| } | |||
| } | |||
| UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors()); | |||
| } | |||
| void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *next_kernel) { | |||
| std::vector<kernel::LiteKernel *> cur_out_kernels; | |||
| for (auto out_kernel : kernel->out_kernels()) { | |||
| if (out_kernel == next_kernel) { | |||
| cur_out_kernels.push_back(trans_kernel); | |||
| } else { | |||
| cur_out_kernels.push_back(out_kernel); | |||
| } | |||
| } | |||
| auto kernel_out_tensor = kernel->out_tensors()[0]; | |||
| // Change format the output of the current kernel nhwc->nchw | |||
| auto nhwc_shape = kernel_out_tensor->shape(); | |||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||
| kernel_out_tensor->set_format(schema::Format_NCHW); | |||
| kernel_out_tensor->set_shape(nchw_shape); | |||
| UpdateKernel(kernel, kernel->in_kernels(), cur_out_kernels, kernel->in_tensors(), {kernel_out_tensor}); | |||
| } | |||
| void NPUPassUtils::UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *before_kernel) { | |||
| std::vector<lite::Tensor *> cur_kernel_in_tensors = {trans_kernel->out_tensors()[0]}; | |||
| for (int i = 1; i < kernel->in_tensors().size(); i++) { | |||
| cur_kernel_in_tensors.push_back(kernel->in_tensors()[i]); | |||
| } | |||
| std::vector<kernel::LiteKernel *> cur_in_kernels = {trans_kernel}; | |||
| for (int i = 0; i < kernel->in_kernels().size(); i++) { | |||
| auto in_kernel = kernel->in_kernels()[i]; | |||
| if (in_kernel != kernel) { | |||
| cur_in_kernels.push_back(in_kernel); | |||
| } | |||
| } | |||
| UpdateKernel(kernel, cur_in_kernels, kernel->out_kernels(), cur_kernel_in_tensors, kernel->out_tensors()); | |||
| } | |||
| void NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *next_kernel) { | |||
| std::vector<Tensor *> next_in_tensors; | |||
| for (auto next_in_tensor : next_kernel->in_tensors()) { | |||
| if (next_in_tensor != kernel->out_tensors()[0]) { | |||
| next_in_tensors.push_back(next_in_tensor); | |||
| } else { | |||
| next_in_tensors.push_back(trans_kernel->out_tensors()[0]); | |||
| } | |||
| } | |||
| next_kernel->set_in_tensors(next_in_tensors); | |||
| std::vector<kernel::LiteKernel *> next_in_kernels; | |||
| for (auto in_kernel : next_kernel->in_kernels()) { | |||
| if (in_kernel == kernel) { | |||
| next_in_kernels.push_back(trans_kernel); | |||
| } else { | |||
| next_in_kernels.push_back(in_kernel); | |||
| } | |||
| } | |||
| NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors, | |||
| next_kernel->out_tensors()); | |||
| } | |||
| } // namespace mindspore::lite | |||
| @@ -35,6 +35,18 @@ class NPUPassUtils { | |||
| const std::vector<kernel::LiteKernel *> &out_kernels, | |||
| const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors); | |||
| static void UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *after_kernel); | |||
| static void UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *next_kernel); | |||
| static void UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *before_kernel); | |||
| static void UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *next_kernel); | |||
| private: | |||
| static PrimitiveC *CreateNchw2NhwcPrimitive(); | |||
| @@ -21,43 +21,9 @@ | |||
| namespace mindspore::lite { | |||
| using kernel::KERNEL_ARCH::kCPU; | |||
| using kernel::KERNEL_ARCH::kNPU; | |||
| int NPUTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *after_kernel) { | |||
| std::vector<kernel::LiteKernel *> out_kernels; | |||
| for (auto out_kernel : kernel->out_kernels()) { | |||
| if (out_kernel == after_kernel) { | |||
| out_kernels.push_back(trans_kernel); | |||
| } else { | |||
| out_kernels.push_back(out_kernel); | |||
| } | |||
| } | |||
| NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors()); | |||
| return RET_OK; | |||
| } | |||
| int NPUTransformPass::UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *before_kernel) { | |||
| std::vector<lite::Tensor *> cur_kernel_in_tensors = {trans_kernel->out_tensors()[0]}; | |||
| for (int i = 1; i < kernel->in_tensors().size(); i++) { | |||
| cur_kernel_in_tensors.push_back(kernel->in_tensors()[i]); | |||
| } | |||
| std::vector<kernel::LiteKernel *> cur_in_kernels = {trans_kernel}; | |||
| for (int i = 0; i < kernel->in_kernels().size(); i++) { | |||
| auto in_kernel = kernel->in_kernels()[i]; | |||
| if (in_kernel != kernel) { | |||
| cur_in_kernels.push_back(in_kernel); | |||
| } | |||
| } | |||
| NPUPassUtils::UpdateKernel(kernel, cur_in_kernels, kernel->out_kernels(), cur_kernel_in_tensors, | |||
| kernel->out_tensors()); | |||
| return RET_OK; | |||
| } | |||
| int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||
| int NPUTransformPass::InsertPreNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||
| std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| auto kernel = *it; | |||
| bool is_input_kernel = kernel->in_kernels().empty(); | |||
| if (is_input_kernel || kernel->in_kernels()[0]->desc().arch != kNPU || | |||
| npu_trans_nodes.find(kernel->in_kernels()[0]->Type()) == npu_trans_nodes.end()) { | |||
| @@ -66,9 +32,9 @@ int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vector<ker | |||
| before_kernel = kernel->in_kernels()[0]; | |||
| } | |||
| // Create pre transform kernel out tensors. | |||
| std::vector<int> shapes{kernel->in_tensors()[0]->shape()[0], kernel->in_tensors()[0]->shape()[3], | |||
| kernel->in_tensors()[0]->shape()[1], kernel->in_tensors()[0]->shape()[2]}; | |||
| auto tensor = new Tensor(kernel->in_tensors()[0]->data_type(), shapes, schema::Format_NCHW, Tensor::VAR); | |||
| auto nhwc_shape = kernel->in_tensors()[0]->shape(); | |||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||
| auto tensor = new Tensor(kernel->in_tensors()[0]->data_type(), nchw_shape, schema::Format_NCHW, Tensor::VAR); | |||
| std::vector<Tensor *> pre_trans_out_tensors = {tensor}; | |||
| all_tensors->push_back(pre_trans_out_tensors[0]); | |||
| // Replace the output tensor of the previous node | |||
| @@ -89,17 +55,16 @@ int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vector<ker | |||
| pre_trans_out_tensors); | |||
| if (before_kernel != nullptr) { | |||
| UpdateNH2NCTransNodePreKernel(before_kernel, pre_trans_kernel, kernel); | |||
| NPUPassUtils::UpdateNH2NCTransNodePreKernel(before_kernel, pre_trans_kernel, kernel); | |||
| } | |||
| UpdateNH2NCTransNodeAfterKernel(kernel, pre_trans_kernel, before_kernel); | |||
| NPUPassUtils::UpdateNH2NCTransNodeAfterKernel(kernel, pre_trans_kernel, before_kernel); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||
| int NPUTransformPass::InsertPostNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||
| std::vector<kernel::LiteKernel *> *all_kernels, | |||
| std::vector<Tensor *> *all_tensors) { | |||
| auto kernel = *it; | |||
| // Model output does not insert operator | |||
| if (kernel->out_kernels().empty()) { | |||
| return RET_OK; | |||
| @@ -111,9 +76,8 @@ int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector<ke | |||
| continue; | |||
| } | |||
| // Change format the output of the current kernel nhwc->nchw | |||
| auto shapes = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[1], | |||
| kernel->out_tensors()[0]->shape()[2], kernel->out_tensors()[0]->shape()[3]}; | |||
| auto tensor = new Tensor(kernel->out_tensors()[0]->data_type(), shapes, schema::Format_NHWC, Tensor::VAR); | |||
| auto tensor = new Tensor(kernel->out_tensors()[0]->data_type(), kernel->out_tensors()[0]->shape(), | |||
| schema::Format_NHWC, Tensor::VAR); | |||
| std::vector<Tensor *> post_trans_out_tensors = {tensor}; | |||
| all_tensors->push_back(post_trans_out_tensors[0]); | |||
| // Use the output tensor of the current node as the input tensor of the post-conversion operator | |||
| @@ -126,81 +90,32 @@ int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector<ke | |||
| insert_primitive_.push_back(post_trans_kernel->GetPrimitive()); | |||
| // Directly insert in the back, will not affect the topological sort | |||
| all_kernels->push_back(post_trans_kernel); | |||
| UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, next_kernel); | |||
| UpdateNC2NHTransNodeAfterKernel(kernel, post_trans_kernel, next_kernel); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int NPUTransformPass::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *next_kernel) { | |||
| std::vector<kernel::LiteKernel *> cur_out_kernels; | |||
| for (auto out_kernel : kernel->out_kernels()) { | |||
| if (out_kernel == next_kernel) { | |||
| cur_out_kernels.push_back(trans_kernel); | |||
| } else { | |||
| cur_out_kernels.push_back(out_kernel); | |||
| } | |||
| NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, next_kernel); | |||
| NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel, post_trans_kernel, next_kernel); | |||
| } | |||
| auto kernel_out_tensor = kernel->out_tensors()[0]; | |||
| // Change format the output of the current kernel nhwc->nchw | |||
| std::vector<int> kernel_out_new_shapes = {kernel_out_tensor->shape()[0], kernel_out_tensor->shape()[3], | |||
| kernel_out_tensor->shape()[1], kernel_out_tensor->shape()[2]}; | |||
| kernel_out_tensor->set_format(schema::Format_NCHW); | |||
| kernel_out_tensor->set_shape(kernel_out_new_shapes); | |||
| NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), cur_out_kernels, kernel->in_tensors(), {kernel_out_tensor}); | |||
| return RET_OK; | |||
| } | |||
| int NPUTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *next_kernel) { | |||
| std::vector<Tensor *> next_in_tensors; | |||
| for (auto next_in_tensor : next_kernel->in_tensors()) { | |||
| if (next_in_tensor != kernel->out_tensors()[0]) { | |||
| next_in_tensors.push_back(next_in_tensor); | |||
| } else { | |||
| next_in_tensors.push_back(trans_kernel->out_tensors()[0]); | |||
| } | |||
| int NPUTransformPass::Run() { | |||
| if (!context_->IsNpuEnabled()) { | |||
| return RET_OK; | |||
| } | |||
| next_kernel->set_in_tensors(next_in_tensors); | |||
| std::vector<kernel::LiteKernel *> next_in_kernels; | |||
| for (auto in_kernel : next_kernel->in_kernels()) { | |||
| if (in_kernel == kernel) { | |||
| next_in_kernels.push_back(trans_kernel); | |||
| } else { | |||
| next_in_kernels.push_back(in_kernel); | |||
| for (size_t i = 0; i < all_kernels_->size();) { | |||
| auto kernel = (*all_kernels_)[i]; | |||
| if (kernel->desc().arch != kNPU || npu_trans_nodes.find(kernel->Type()) == npu_trans_nodes.end()) { | |||
| i++; | |||
| continue; | |||
| } | |||
| } | |||
| NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors, | |||
| next_kernel->out_tensors()); | |||
| std::vector<kernel::LiteKernel *> pre_kernels; | |||
| InsertPreNode(context_, kernel, &pre_kernels, all_tensors_); | |||
| all_kernels_->insert(all_kernels_->begin() + i, pre_kernels.begin(), pre_kernels.end()); | |||
| i += (pre_kernels.size() + 1); | |||
| return RET_OK; | |||
| } | |||
| int NPUTransformPass::Run() { | |||
| if (context_->IsNpuEnabled()) { | |||
| std::vector<kernel::LiteKernel *> new_kernels; | |||
| for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) { | |||
| auto kernel = *it; | |||
| if (kernel->desc().arch != kNPU) { | |||
| new_kernels.push_back(kernel); | |||
| continue; | |||
| } | |||
| if (npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) { | |||
| InsertPreNode(context_, it, &new_kernels, all_tensors_); | |||
| new_kernels.push_back(kernel); | |||
| InsertPostNode(context_, it, &new_kernels, all_tensors_); | |||
| } else { | |||
| new_kernels.push_back(kernel); | |||
| } | |||
| } | |||
| all_kernels_->clear(); | |||
| for (int i = 0; i < new_kernels.size(); i++) { | |||
| all_kernels_->push_back(new_kernels[i]); | |||
| } | |||
| std::vector<kernel::LiteKernel *> post_kernels; | |||
| InsertPostNode(context_, kernel, &post_kernels, all_tensors_); | |||
| all_kernels_->insert(all_kernels_->begin() + i, post_kernels.begin(), post_kernels.end()); | |||
| i += post_kernels.size(); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::lite | |||
| @@ -20,6 +20,7 @@ | |||
| #include "src/lite_kernel.h" | |||
| #include "src/ops/primitive_c.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_base_pass.h" | |||
| namespace mindspore::lite { | |||
| class NPUTransformPass : public NPUBasePass { | |||
| public: | |||
| @@ -32,6 +33,7 @@ class NPUTransformPass : public NPUBasePass { | |||
| all_tensors_ = all_tensors; | |||
| name_ = "NPUTransformPass"; | |||
| } | |||
| ~NPUTransformPass() override { | |||
| for (auto primitive : insert_primitive_) { | |||
| delete primitive; | |||
| @@ -40,22 +42,10 @@ class NPUTransformPass : public NPUBasePass { | |||
| } | |||
| private: | |||
| int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *after_kernel); | |||
| int UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *before_kernel); | |||
| int UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *after_kernel); | |||
| int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *next_kernel); | |||
| int InsertPreNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||
| int InsertPreNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||
| std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors); | |||
| int InsertPostNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||
| int InsertPostNode(const InnerContext *context, kernel::LiteKernel *kernel, | |||
| std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors); | |||
| private: | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/npu/activation.h" | |||
| #include "src/runtime/kernel/npu/activation_npu.h" | |||
| #include "include/graph/op/all_ops.h" | |||
| #include "src/kernel_registry.h" | |||
| @@ -37,7 +37,7 @@ int ActivationNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, | |||
| int ActivationNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) { | |||
| act_ = new (std::nothrow) hiai::op::Activation(name_ + "_act"); | |||
| act_ = new (std::nothrow) hiai::op::Activation(name_); | |||
| if (act_ == nullptr) { | |||
| MS_LOG(ERROR) << "New activation npu operator for activation op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| @@ -0,0 +1,94 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/npu/batchnorm_npu.h" | |||
| #include "include/graph/op/all_ops.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "src/runtime/agent/npu/npu_converter_utils.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kNPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::schema::PrimitiveType_FusedBatchNorm; | |||
| namespace mindspore::kernel { | |||
| int BatchnormNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| OpParameter *opParameter) { | |||
| return RET_OK; | |||
| } | |||
| int BatchnormNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) { | |||
| batchnorm_ = new (std::nothrow) ge::op::BatchNormExt2(name_); | |||
| if (batchnorm_ == nullptr) { | |||
| MS_LOG(ERROR) << "New batchnorm npu operator for batchnorm op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| batchnorm_->set_input_x(*npu_inputs[0]); | |||
| auto scale = new (std::nothrow) hiai::op::Const(name_ + "_scale"); | |||
| if (scale == nullptr) { | |||
| MS_LOG(ERROR) << "New scale const failed."; | |||
| return RET_ERROR; | |||
| } | |||
| auto scale_tensor = mindspore::lite::ConverterToNPUTensor(inputs[1]); | |||
| scale->set_attr_value(scale_tensor); | |||
| batchnorm_->set_input_scale(*scale); | |||
| auto offset = new (std::nothrow) hiai::op::Const(name_ + "_offset"); | |||
| if (offset == nullptr) { | |||
| MS_LOG(ERROR) << "New offset const failed."; | |||
| return RET_ERROR; | |||
| } | |||
| auto offset_tensor = mindspore::lite::ConverterToNPUTensor(inputs[1]); | |||
| offset->set_attr_value(offset_tensor); | |||
| batchnorm_->set_input_offset(*offset); | |||
| auto mean = new (std::nothrow) hiai::op::Const(name_ + "_mean"); | |||
| if (mean == nullptr) { | |||
| MS_LOG(ERROR) << "New mean const failed."; | |||
| return RET_ERROR; | |||
| } | |||
| auto mean_tensor = mindspore::lite::ConverterToNPUTensor(inputs[1]); | |||
| mean->set_attr_value(mean_tensor); | |||
| batchnorm_->set_input_mean(*mean); | |||
| auto variance = new (std::nothrow) hiai::op::Const(name_ + "_variance"); | |||
| if (variance == nullptr) { | |||
| MS_LOG(ERROR) << "New variance const failed."; | |||
| return RET_ERROR; | |||
| } | |||
| auto variance_tensor = mindspore::lite::ConverterToNPUTensor(inputs[1]); | |||
| variance->set_attr_value(variance_tensor); | |||
| batchnorm_->set_input_variance(*variance); | |||
| batchnorm_->set_attr_epsilon(batchnorm_param_->epsilon_); | |||
| batchnorm_->set_attr_momentum(batchnorm_param_->momentum_); | |||
| batchnorm_->set_attr_mode(1); | |||
| return RET_OK; | |||
| } | |||
| ge::Operator *mindspore::kernel::BatchnormNPUKernel::GetNPUOp() { return batchnorm_; } | |||
| BatchnormNPUKernel::~BatchnormNPUKernel() { | |||
| if (batchnorm_ != nullptr) { | |||
| delete batchnorm_; | |||
| batchnorm_ = nullptr; | |||
| } | |||
| } | |||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_FusedBatchNorm, NPUKernelCreator<BatchnormNPUKernel>) | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,47 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_BATCHNORM_NPU_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_BATCHNORM_NPU_H_ | |||
| #include <vector> | |||
| #include "include/graph/op/all_ops.h" | |||
| #include "include/graph/compatible/all_ops.h" | |||
| #include "src/runtime/kernel/npu/npu_kernel.h" | |||
| #include "nnacl/batchnorm_parameter.h" | |||
| namespace mindspore::kernel { | |||
| class BatchnormNPUKernel : public NPUKernel { | |||
| public: | |||
| BatchnormNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| batchnorm_param_ = reinterpret_cast<BatchNormParameter *>(parameter); | |||
| } | |||
| ~BatchnormNPUKernel() override; | |||
| int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| OpParameter *opParameter) override; | |||
| int SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) override; | |||
| ge::Operator *GetNPUOp() override; | |||
| private: | |||
| ge::op::BatchNormExt2 *batchnorm_ = nullptr; | |||
| BatchNormParameter *batchnorm_param_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_BATCHNORM_NPU_H_ | |||
| @@ -24,6 +24,10 @@ using mindspore::schema::PrimitiveType_Conv2D; | |||
| namespace mindspore::kernel { | |||
| int ConvolutionNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) { | |||
| if (conv_param_->group_ != 1) { | |||
| MS_LOG(WARNING) << "Only support group equals 1 for npu convolution op"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -0,0 +1,105 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/npu/deconvolution_npu.h" | |||
| #include "src/runtime/agent/npu/npu_converter_utils.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kNPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::schema::PrimitiveType_DeConv2D; | |||
| namespace mindspore::kernel { | |||
| int DeconvolutionNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) { | |||
| if (conv_param_->group_ != 1) { | |||
| MS_LOG(WARNING) << "Only support group equals 1 for npu deconvolution op"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int DeconvolutionNPUKernel::SetConvParam() { | |||
| deconv_->set_attr_strides(ge::AttrValue::LIST_INT({conv_param_->stride_h_, conv_param_->stride_w_})); | |||
| deconv_->set_attr_dilations(ge::AttrValue::LIST_INT({conv_param_->dilation_h_, conv_param_->dilation_w_})); | |||
| deconv_->set_attr_groups(conv_param_->group_); | |||
| if (conv_param_->pad_mode_ == Pad_Same) { | |||
| deconv_->set_attr_pad_mode(ge::AttrValue::STR{"SAME"}); | |||
| deconv_->set_attr_pads(ge::AttrValue::LIST_INT({0, 0, 0, 0})); | |||
| } else if (conv_param_->pad_mode_ == Pad_Valid) { | |||
| deconv_->set_attr_pad_mode(ge::AttrValue::STR{"VALID"}); | |||
| deconv_->set_attr_pads(ge::AttrValue::LIST_INT({0, 0, 0, 0})); | |||
| } else { | |||
| deconv_->set_attr_pad_mode(ge::AttrValue::STR{"SPECIFIC"}); | |||
| deconv_->set_attr_pads( | |||
| ge::AttrValue::LIST_INT({conv_param_->pad_u_, conv_param_->pad_d_, conv_param_->pad_l_, conv_param_->pad_r_})); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int DeconvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) { | |||
| // set conv attr param | |||
| deconv_ = new (std::nothrow) hiai::op::ConvTranspose(name_ + "_deconv"); | |||
| if (deconv_ == nullptr) { | |||
| MS_LOG(ERROR) << "New deconvolution operator for deconvolution op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| auto ret = SetConvParam(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Set npu op parameter for deconvolution op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = InitWeightBiasConst(inputs); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Set weight and bias for deconvolution op " << name_ << " failed when running npu"; | |||
| return RET_ERROR; | |||
| } | |||
| deconv_->set_input_filter(*weight_); | |||
| if (inputs.size() == 3) { | |||
| deconv_->set_input_bias(*bias_); | |||
| } | |||
| deconv_->set_input_x(*npu_inputs[0]); | |||
| if (conv_param_->act_type_ != ActType_No) { | |||
| ret = SetActivation(deconv_, conv_param_->act_type_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| ge::Operator *mindspore::kernel::DeconvolutionNPUKernel::GetNPUOp() { | |||
| if (conv_param_->act_type_ == ActType_No) { | |||
| return deconv_; | |||
| } else { | |||
| return act_; | |||
| } | |||
| } | |||
| DeconvolutionNPUKernel::~DeconvolutionNPUKernel() { | |||
| if (deconv_ != nullptr) { | |||
| delete deconv_; | |||
| deconv_ = nullptr; | |||
| } | |||
| } | |||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_DeConv2D, NPUKernelCreator<DeconvolutionNPUKernel>) | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,47 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_DECONVOLUTION_NPU_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_DECONVOLUTION_NPU_H_ | |||
| #include <vector> | |||
| #include "include/graph/op/all_ops.h" | |||
| #include "src/runtime/kernel/npu/convolution_base_npu.h" | |||
| #include "nnacl/conv_parameter.h" | |||
| namespace mindspore::kernel { | |||
| class DeconvolutionNPUKernel : public ConvolutionBaseNPUKernel { | |||
| public: | |||
| DeconvolutionNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : ConvolutionBaseNPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| conv_param_ = reinterpret_cast<ConvParameter *>(parameter); | |||
| } | |||
| ~DeconvolutionNPUKernel() override; | |||
| int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| OpParameter *opParameter) override; | |||
| int SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) override; | |||
| ge::Operator *GetNPUOp() override; | |||
| private: | |||
| int SetConvParam(); | |||
| hiai::op::ConvTranspose *deconv_ = nullptr; | |||
| ConvParameter *conv_param_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_DECONVOLUTION_NPU_H_ | |||
| @@ -37,8 +37,7 @@ | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_manager.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_transform_pass.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_fusion_pass.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_add_transform_pass.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_insert_transform_pass.h" | |||
| #endif | |||
| namespace mindspore::lite { | |||
| using kernel::KERNEL_ARCH::kCPU; | |||
| @@ -570,9 +569,7 @@ int Scheduler::RunPass(std::vector<kernel::LiteKernel *> *dst_kernels) { | |||
| #if SUPPORT_NPU | |||
| auto transform_pass = new NPUTransformPass(context_, dst_kernels, src_tensors_); | |||
| mindspore::lite::NPUPassManager::GetInstance()->AddPass(transform_pass); | |||
| auto add_format_pass = new NPUAddTransformPass(context_, dst_kernels, src_tensors_); | |||
| mindspore::lite::NPUPassManager::GetInstance()->AddPass(add_format_pass); | |||
| auto concat_format_pass = new NPUConcatTransformPass(context_, dst_kernels, src_tensors_); | |||
| auto concat_format_pass = new NPUInsertTransformPass(context_, dst_kernels, src_tensors_); | |||
| mindspore::lite::NPUPassManager::GetInstance()->AddPass(concat_format_pass); | |||
| auto fusion_pass = new NPUFusionPass(dst_kernels); | |||
| mindspore::lite::NPUPassManager::GetInstance()->AddPass(fusion_pass); | |||