From 432e81c212e7624fd4204c147ac46eca4e32e22a Mon Sep 17 00:00:00 2001 From: zhaozhenlong Date: Fri, 9 Apr 2021 15:28:41 +0800 Subject: [PATCH] expand dims and tile npu ops npu support select input from multi outputs --- .../runtime/agent/npu/subgraph_npu_kernel.cc | 42 ++++++---- .../src/runtime/kernel/npu/arithmetic_npu.cc | 20 +++++ .../src/runtime/kernel/npu/arithmetic_npu.h | 5 ++ .../src/runtime/kernel/npu/expand_dims_npu.cc | 55 +++++++++++++ .../src/runtime/kernel/npu/expand_dims_npu.h | 42 ++++++++++ .../lite/src/runtime/kernel/npu/npu_kernel.h | 10 +++ .../lite/src/runtime/kernel/npu/tile_npu.cc | 80 +++++++++++++++++++ .../lite/src/runtime/kernel/npu/tile_npu.h | 47 +++++++++++ .../src/runtime/kernel/npu/transpose_npu.cc | 6 +- 9 files changed, 289 insertions(+), 18 deletions(-) create mode 100644 mindspore/lite/src/runtime/kernel/npu/expand_dims_npu.cc create mode 100644 mindspore/lite/src/runtime/kernel/npu/expand_dims_npu.h create mode 100644 mindspore/lite/src/runtime/kernel/npu/tile_npu.cc create mode 100644 mindspore/lite/src/runtime/kernel/npu/tile_npu.h diff --git a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc index 801126dc83..a9208b0629 100644 --- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc +++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc @@ -16,6 +16,8 @@ #include "src/runtime/agent/npu/subgraph_npu_kernel.h" #include +#include +#include #include "include/errorcode.h" #include "src/runtime/agent/npu/npu_executor.h" #include "include/graph/operator.h" @@ -34,8 +36,10 @@ using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; static std::set npu_specific_weight_nodes = { - schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_ScaleFusion, - schema::PrimitiveType_BatchNorm, schema::PrimitiveType_FullConnection, schema::PrimitiveType_InstanceNorm}; + schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, + schema::PrimitiveType_ScaleFusion, schema::PrimitiveType_BatchNorm, + schema::PrimitiveType_FullConnection, schema::PrimitiveType_InstanceNorm, + schema::PrimitiveType_TileFusion, schema::PrimitiveType_PadFusion}; SubGraphNpuKernel::~SubGraphNpuKernel() { subgraph_input_op_.clear(); @@ -95,7 +99,9 @@ int SubGraphNpuKernel::BuildNPUInputOp() { op_buffer_.clear(); for (auto node : this->nodes_) { std::vector node_input_op; - for (auto in_tensor : node->in_tensors()) { + std::unordered_map> index2_multi_out_index; + for (int i = 0; i < node->in_tensors().size(); ++i) { + auto in_tensor = node->in_tensors()[i]; if (IsSubGraphInputTensor(in_tensor)) { auto tensor_name = node->name() + "_" + std::to_string(count++); hiai::op::Data *data; @@ -109,21 +115,24 @@ int SubGraphNpuKernel::BuildNPUInputOp() { bool is_weight_tensor = true; for (auto in_kernel : node->in_kernels()) { if (IsContain(in_kernel->out_tensors(), in_tensor)) { - if (in_kernel->desc().arch == mindspore::kernel::kNPU) { - // input come from npu - auto npu_op = reinterpret_cast(in_kernel)->GetNPUOp(); - if (npu_op != nullptr) { - node_input_op.push_back(npu_op); - is_weight_tensor = false; - break; - } else { - MS_LOG(ERROR) << in_kernel->type_str() << "NPU Operator is nullptr."; - return RET_ERROR; - } - } else { + if (in_kernel->desc().arch != mindspore::kernel::kNPU) { MS_LOG(ERROR) << "The input of the intermediate node comes from the CPU"; return RET_ERROR; } + // input come from npu + auto npu_op = reinterpret_cast(in_kernel)->GetNPUOp(); + if (npu_op == nullptr) { + MS_LOG(ERROR) << in_kernel->type_str() << "NPU Operator is nullptr."; + return RET_ERROR; + } + node_input_op.push_back(npu_op); + if (in_kernel->out_tensors().size() != 1) { // in_kernel has multi output, we record which output we want. + int out_index = std::find(in_kernel->out_tensors().begin(), in_kernel->out_tensors().end(), in_tensor) - + in_kernel->out_tensors().begin(); + index2_multi_out_index[i] = {npu_op, out_index}; + } + is_weight_tensor = false; + break; } } @@ -144,7 +153,8 @@ int SubGraphNpuKernel::BuildNPUInputOp() { } } // set input to NPU - int ret = reinterpret_cast(node)->SetNPUInputs(node->in_tensors(), node->out_tensors(), node_input_op); + int ret = reinterpret_cast(node)->SetNPUInputs(node->in_tensors(), node->out_tensors(), node_input_op, + index2_multi_out_index); if (ret != RET_OK) { MS_LOG(ERROR) << node->name() << " set npu inputs failed."; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.cc b/mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.cc index f7ac613bf2..221ccfed3d 100644 --- a/mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.cc @@ -15,6 +15,8 @@ */ #include "src/runtime/kernel/npu/arithmetic_npu.h" +#include +#include #include #include "include/graph/op/all_ops.h" #include "src/kernel_registry.h" @@ -165,6 +167,24 @@ int ArithmeticNPUKernel::SetNPUInputs(const std::vector &inputs, return RET_OK; } +int ArithmeticNPUKernel::SetNPUInputs( + const std::vector &inputs, const std::vector &outputs, + const std::vector &npu_inputs, + const std::unordered_map> &index2_multi_out_index) { + auto ret = SetNPUInputs(inputs, outputs, npu_inputs); + if (ret != RET_OK) { + MS_LOG(ERROR) << "ArithmeticNPUKernel SetNPUInputs failed"; + return RET_ERROR; + } + if (index2_multi_out_index.empty()) { + return RET_OK; + } + for (auto it : index2_multi_out_index) { + MS_LOG(INFO) << name_ << "set input " << it.first << " from " << it.second.first << " output " << it.second.second; + op_->SetInput(it.first, *it.second.first, it.second.second); + } + return RET_OK; +} ge::Operator *mindspore::kernel::ArithmeticNPUKernel::GetNPUOp() { if (activation_type_ == ActivationType_NO_ACTIVATION) { return op_; diff --git a/mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.h b/mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.h index 9f48009dbe..8c857ed84a 100644 --- a/mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.h @@ -17,6 +17,8 @@ #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_ #include +#include +#include #include "nnacl/arithmetic.h" #include "src/runtime/kernel/npu/npu_kernel.h" #include "include/graph/op/all_ops.h" @@ -34,6 +36,9 @@ class ArithmeticNPUKernel : public NPUKernel { OpParameter *opParameter) override; int SetNPUInputs(const std::vector &inputs, const std::vector &outputs, const std::vector &npu_inputs) override; + int SetNPUInputs(const std::vector &inputs, const std::vector &outputs, + const std::vector &npu_inputs, + const std::unordered_map> &index2_multi_out_index) override; ge::Operator *GetNPUOp() override; diff --git a/mindspore/lite/src/runtime/kernel/npu/expand_dims_npu.cc b/mindspore/lite/src/runtime/kernel/npu/expand_dims_npu.cc new file mode 100644 index 0000000000..31a3ee506d --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/npu/expand_dims_npu.cc @@ -0,0 +1,55 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/npu/expand_dims_npu.h" +#include "include/graph/op/all_ops.h" +#include "src/kernel_registry.h" +#include "src/runtime/agent/npu/npu_converter_utils.h" + +using mindspore::kernel::KERNEL_ARCH::kNPU; +using mindspore::lite::KernelRegistrar; +using mindspore::schema::PrimitiveType_ExpandDims; + +namespace mindspore::kernel { +int ExpandDimsNPUKernel::IsSupport(const std::vector &inputs, + const std::vector &outputs, OpParameter *opParameter) { + return RET_OK; +} + +int ExpandDimsNPUKernel::SetNPUInputs(const std::vector &inputs, + const std::vector &outputs, + const std::vector &npu_inputs) { + op_ = new (std::nothrow) hiai::op::ExpandDims(name_); + if (op_ == nullptr) { + MS_LOG(ERROR) << name_ << " op is nullptr"; + return RET_ERROR; + } + op_->set_input_x(*npu_inputs[0]); + op_->set_input_axis(*npu_inputs[1]); + + return RET_OK; +} + +ge::Operator *mindspore::kernel::ExpandDimsNPUKernel::GetNPUOp() { return this->op_; } + +ExpandDimsNPUKernel::~ExpandDimsNPUKernel() { + if (op_ != nullptr) { + delete op_; + op_ = nullptr; + } +} +REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_ExpandDims, NPUKernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/npu/expand_dims_npu.h b/mindspore/lite/src/runtime/kernel/npu/expand_dims_npu.h new file mode 100644 index 0000000000..68710305ca --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/npu/expand_dims_npu.h @@ -0,0 +1,42 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_EXPAND_DIMS_NPU_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_EXPAND_DIMS_NPU_H_ +#include +#include "src/runtime/kernel/npu/npu_kernel.h" +#include "include/graph/op/all_ops.h" + +namespace mindspore::kernel { +class ExpandDimsNPUKernel : public NPUKernel { + public: + ExpandDimsNPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx) + : NPUKernel(parameter, inputs, outputs, ctx) {} + ~ExpandDimsNPUKernel() override; + + int IsSupport(const std::vector &inputs, const std::vector &outputs, + OpParameter *opParameter) override; + int SetNPUInputs(const std::vector &inputs, const std::vector &outputs, + const std::vector &npu_inputs) override; + + ge::Operator *GetNPUOp() override; + + private: + hiai::op::ExpandDims *op_ = nullptr; +}; +} // namespace mindspore::kernel +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_EXPAND_DIMS_NPU_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h b/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h index 77ec44ebae..546af30166 100644 --- a/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h +++ b/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h @@ -18,6 +18,8 @@ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_KERNEL_NPU_H_ #include +#include +#include #include "src/lite_kernel.h" #include "include/errorcode.h" #include "include/graph/graph.h" @@ -46,6 +48,14 @@ class NPUKernel : public LiteKernel { virtual int SetNPUInputs(const std::vector &inputs, const std::vector &outputs, const std::vector &npu_inputs) = 0; + virtual int SetNPUInputs(const std::vector &inputs, + const std::vector &outputs, const std::vector &npu_inputs, + const std::unordered_map> &index2_multi_out_index) { + if (index2_multi_out_index.empty()) { + return SetNPUInputs(inputs, outputs, npu_inputs); + } + return RET_OK; + } }; template kernel::LiteKernel *NPUKernelCreator(const std::vector &inputs, diff --git a/mindspore/lite/src/runtime/kernel/npu/tile_npu.cc b/mindspore/lite/src/runtime/kernel/npu/tile_npu.cc new file mode 100644 index 0000000000..7153f9eb01 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/npu/tile_npu.cc @@ -0,0 +1,80 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/npu/tile_npu.h" +#include +#include "include/graph/op/all_ops.h" +#include "src/kernel_registry.h" +#include "src/runtime/agent/npu/npu_converter_utils.h" + +using mindspore::kernel::KERNEL_ARCH::kNPU; +using mindspore::lite::KernelRegistrar; +using mindspore::schema::PrimitiveType_TileFusion; + +namespace mindspore::kernel { +int TileNPUKernel::IsSupport(const std::vector &inputs, const std::vector &outputs, + OpParameter *opParameter) { + if (inputs.size() != 2) { + return RET_ERROR; + } + auto multiple_tensor = inputs[1]; + if (multiple_tensor->ElementsNum() > 4) { + return RET_ERROR; + } + int *multiple_data = reinterpret_cast(multiple_tensor->data_c()); + if (multiple_data == nullptr) { + return RET_ERROR; + } + for (int i = 0; i < multiple_tensor->ElementsNum(); ++i) { + param_->multiples_[i] = multiple_data[i]; + } + param_->multiples_size_ = static_cast(multiple_tensor->ElementsNum()); + return RET_OK; +} + +int TileNPUKernel::SetNPUInputs(const std::vector &inputs, const std::vector &outputs, + const std::vector &npu_inputs) { + op_ = new (std::nothrow) hiai::op::Tile(name_); + if (op_ == nullptr) { + MS_LOG(ERROR) << name_ << " op is nullptr"; + return RET_ERROR; + } + op_->set_input_x(*npu_inputs[0]); + + ge::TensorDesc multiple_tensor_desc(ge::Shape({static_cast(param_->multiples_size_)}), ge::FORMAT_NCHW, + ge::DT_INT32); + ge::TensorPtr multiple_tensor = std::make_shared(multiple_tensor_desc); + multiple_tensor->SetData(reinterpret_cast(param_->multiples_), param_->multiples_size_ * sizeof(int)); + multiple_ = new hiai::op::Const(name_ + "multiples"); + multiple_->set_attr_value(multiple_tensor); + op_->set_input_multiples(*multiple_); + return RET_OK; +} + +ge::Operator *mindspore::kernel::TileNPUKernel::GetNPUOp() { return this->op_; } + +TileNPUKernel::~TileNPUKernel() { + if (op_ != nullptr) { + delete op_; + op_ = nullptr; + } + if (multiple_ != nullptr) { + delete multiple_; + multiple_ = nullptr; + } +} +REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_TileFusion, NPUKernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/npu/tile_npu.h b/mindspore/lite/src/runtime/kernel/npu/tile_npu.h new file mode 100644 index 0000000000..9975372edd --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/npu/tile_npu.h @@ -0,0 +1,47 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TILE_NPU_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TILE_NPU_H_ +#include +#include "src/runtime/kernel/npu/npu_kernel.h" +#include "include/graph/op/all_ops.h" +#include "nnacl/base/tile_base.h" + +namespace mindspore::kernel { +class TileNPUKernel : public NPUKernel { + public: + TileNPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx) + : NPUKernel(parameter, inputs, outputs, ctx) { + param_ = reinterpret_cast(parameter); + } + ~TileNPUKernel() override; + + int IsSupport(const std::vector &inputs, const std::vector &outputs, + OpParameter *opParameter) override; + int SetNPUInputs(const std::vector &inputs, const std::vector &outputs, + const std::vector &npu_inputs) override; + + ge::Operator *GetNPUOp() override; + + private: + hiai::op::Tile *op_ = nullptr; + hiai::op::Const *multiple_ = nullptr; + TileParameter *param_ = nullptr; +}; +} // namespace mindspore::kernel +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TILE_NPU_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/transpose_npu.cc b/mindspore/lite/src/runtime/kernel/npu/transpose_npu.cc index c4ac33c6b3..26717ee2e8 100644 --- a/mindspore/lite/src/runtime/kernel/npu/transpose_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/transpose_npu.cc @@ -33,10 +33,12 @@ int TransposeNPUKernel::IsSupport(const std::vector &inputs, con perm_.push_back(static_cast(inputs[1]->data_c())[i]); } } else { - MS_LOG(WARNING) << "NPU perm is attribute."; + MS_LOG(WARNING) << "NPU perm is attribute or input[1] data nullptr"; return RET_ERROR; } - + if (inputs[1]->ElementsNum() != 4) { + return RET_OK; + } return RET_ERROR; }