From: @zhaozhenlong Reviewed-by: @zhang_xue_tong,@zhanghaibo5 Signed-off-by: @zhang_xue_tongpull/15057/MERGE
| @@ -16,6 +16,8 @@ | |||
| #include "src/runtime/agent/npu/subgraph_npu_kernel.h" | |||
| #include <set> | |||
| #include <unordered_map> | |||
| #include <utility> | |||
| #include "include/errorcode.h" | |||
| #include "src/runtime/agent/npu/npu_executor.h" | |||
| #include "include/graph/operator.h" | |||
| @@ -34,8 +36,10 @@ using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| static std::set<mindspore::schema::PrimitiveType> npu_specific_weight_nodes = { | |||
| schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_ScaleFusion, | |||
| schema::PrimitiveType_BatchNorm, schema::PrimitiveType_FullConnection, schema::PrimitiveType_InstanceNorm}; | |||
| schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, | |||
| schema::PrimitiveType_ScaleFusion, schema::PrimitiveType_BatchNorm, | |||
| schema::PrimitiveType_FullConnection, schema::PrimitiveType_InstanceNorm, | |||
| schema::PrimitiveType_TileFusion, schema::PrimitiveType_PadFusion}; | |||
| SubGraphNpuKernel::~SubGraphNpuKernel() { | |||
| subgraph_input_op_.clear(); | |||
| @@ -95,7 +99,9 @@ int SubGraphNpuKernel::BuildNPUInputOp() { | |||
| op_buffer_.clear(); | |||
| for (auto node : this->nodes_) { | |||
| std::vector<ge::Operator *> node_input_op; | |||
| for (auto in_tensor : node->in_tensors()) { | |||
| std::unordered_map<int, std::pair<ge::Operator *, int>> index2_multi_out_index; | |||
| for (int i = 0; i < node->in_tensors().size(); ++i) { | |||
| auto in_tensor = node->in_tensors()[i]; | |||
| if (IsSubGraphInputTensor(in_tensor)) { | |||
| auto tensor_name = node->name() + "_" + std::to_string(count++); | |||
| hiai::op::Data *data; | |||
| @@ -109,21 +115,24 @@ int SubGraphNpuKernel::BuildNPUInputOp() { | |||
| bool is_weight_tensor = true; | |||
| for (auto in_kernel : node->in_kernels()) { | |||
| if (IsContain(in_kernel->out_tensors(), in_tensor)) { | |||
| if (in_kernel->desc().arch == mindspore::kernel::kNPU) { | |||
| // input come from npu | |||
| auto npu_op = reinterpret_cast<NPUKernel *>(in_kernel)->GetNPUOp(); | |||
| if (npu_op != nullptr) { | |||
| node_input_op.push_back(npu_op); | |||
| is_weight_tensor = false; | |||
| break; | |||
| } else { | |||
| MS_LOG(ERROR) << in_kernel->type_str() << "NPU Operator is nullptr."; | |||
| return RET_ERROR; | |||
| } | |||
| } else { | |||
| if (in_kernel->desc().arch != mindspore::kernel::kNPU) { | |||
| MS_LOG(ERROR) << "The input of the intermediate node comes from the CPU"; | |||
| return RET_ERROR; | |||
| } | |||
| // input come from npu | |||
| auto npu_op = reinterpret_cast<NPUKernel *>(in_kernel)->GetNPUOp(); | |||
| if (npu_op == nullptr) { | |||
| MS_LOG(ERROR) << in_kernel->type_str() << "NPU Operator is nullptr."; | |||
| return RET_ERROR; | |||
| } | |||
| node_input_op.push_back(npu_op); | |||
| if (in_kernel->out_tensors().size() != 1) { // in_kernel has multi output, we record which output we want. | |||
| int out_index = std::find(in_kernel->out_tensors().begin(), in_kernel->out_tensors().end(), in_tensor) - | |||
| in_kernel->out_tensors().begin(); | |||
| index2_multi_out_index[i] = {npu_op, out_index}; | |||
| } | |||
| is_weight_tensor = false; | |||
| break; | |||
| } | |||
| } | |||
| @@ -144,7 +153,8 @@ int SubGraphNpuKernel::BuildNPUInputOp() { | |||
| } | |||
| } | |||
| // set input to NPU | |||
| int ret = reinterpret_cast<NPUKernel *>(node)->SetNPUInputs(node->in_tensors(), node->out_tensors(), node_input_op); | |||
| int ret = reinterpret_cast<NPUKernel *>(node)->SetNPUInputs(node->in_tensors(), node->out_tensors(), node_input_op, | |||
| index2_multi_out_index); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << node->name() << " set npu inputs failed."; | |||
| return RET_ERROR; | |||
| @@ -15,6 +15,8 @@ | |||
| */ | |||
| #include "src/runtime/kernel/npu/arithmetic_npu.h" | |||
| #include <unordered_map> | |||
| #include <utility> | |||
| #include <string> | |||
| #include "include/graph/op/all_ops.h" | |||
| #include "src/kernel_registry.h" | |||
| @@ -165,6 +167,24 @@ int ArithmeticNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||
| return RET_OK; | |||
| } | |||
| int ArithmeticNPUKernel::SetNPUInputs( | |||
| const std::vector<mindspore::lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs, | |||
| const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) { | |||
| auto ret = SetNPUInputs(inputs, outputs, npu_inputs); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ArithmeticNPUKernel SetNPUInputs failed"; | |||
| return RET_ERROR; | |||
| } | |||
| if (index2_multi_out_index.empty()) { | |||
| return RET_OK; | |||
| } | |||
| for (auto it : index2_multi_out_index) { | |||
| MS_LOG(INFO) << name_ << "set input " << it.first << " from " << it.second.first << " output " << it.second.second; | |||
| op_->SetInput(it.first, *it.second.first, it.second.second); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| ge::Operator *mindspore::kernel::ArithmeticNPUKernel::GetNPUOp() { | |||
| if (activation_type_ == ActivationType_NO_ACTIVATION) { | |||
| return op_; | |||
| @@ -17,6 +17,8 @@ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_ | |||
| #include <vector> | |||
| #include <unordered_map> | |||
| #include <utility> | |||
| #include "nnacl/arithmetic.h" | |||
| #include "src/runtime/kernel/npu/npu_kernel.h" | |||
| #include "include/graph/op/all_ops.h" | |||
| @@ -34,6 +36,9 @@ class ArithmeticNPUKernel : public NPUKernel { | |||
| OpParameter *opParameter) override; | |||
| int SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) override; | |||
| int SetNPUInputs(const std::vector<mindspore::lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs, | |||
| const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override; | |||
| ge::Operator *GetNPUOp() override; | |||
| @@ -0,0 +1,55 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/npu/expand_dims_npu.h" | |||
| #include "include/graph/op/all_ops.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "src/runtime/agent/npu/npu_converter_utils.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kNPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::schema::PrimitiveType_ExpandDims; | |||
| namespace mindspore::kernel { | |||
| int ExpandDimsNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) { | |||
| return RET_OK; | |||
| } | |||
| int ExpandDimsNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) { | |||
| op_ = new (std::nothrow) hiai::op::ExpandDims(name_); | |||
| if (op_ == nullptr) { | |||
| MS_LOG(ERROR) << name_ << " op is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| op_->set_input_x(*npu_inputs[0]); | |||
| op_->set_input_axis(*npu_inputs[1]); | |||
| return RET_OK; | |||
| } | |||
| ge::Operator *mindspore::kernel::ExpandDimsNPUKernel::GetNPUOp() { return this->op_; } | |||
| ExpandDimsNPUKernel::~ExpandDimsNPUKernel() { | |||
| if (op_ != nullptr) { | |||
| delete op_; | |||
| op_ = nullptr; | |||
| } | |||
| } | |||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_ExpandDims, NPUKernelCreator<ExpandDimsNPUKernel>) | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,42 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_EXPAND_DIMS_NPU_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_EXPAND_DIMS_NPU_H_ | |||
| #include <vector> | |||
| #include "src/runtime/kernel/npu/npu_kernel.h" | |||
| #include "include/graph/op/all_ops.h" | |||
| namespace mindspore::kernel { | |||
| class ExpandDimsNPUKernel : public NPUKernel { | |||
| public: | |||
| ExpandDimsNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | |||
| : NPUKernel(parameter, inputs, outputs, ctx) {} | |||
| ~ExpandDimsNPUKernel() override; | |||
| int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| OpParameter *opParameter) override; | |||
| int SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) override; | |||
| ge::Operator *GetNPUOp() override; | |||
| private: | |||
| hiai::op::ExpandDims *op_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_EXPAND_DIMS_NPU_H_ | |||
| @@ -18,6 +18,8 @@ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_KERNEL_NPU_H_ | |||
| #include <vector> | |||
| #include <unordered_map> | |||
| #include <utility> | |||
| #include "src/lite_kernel.h" | |||
| #include "include/errorcode.h" | |||
| #include "include/graph/graph.h" | |||
| @@ -46,6 +48,14 @@ class NPUKernel : public LiteKernel { | |||
| virtual int SetNPUInputs(const std::vector<mindspore::lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) = 0; | |||
| virtual int SetNPUInputs(const std::vector<mindspore::lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const std::vector<ge::Operator *> &npu_inputs, | |||
| const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) { | |||
| if (index2_multi_out_index.empty()) { | |||
| return SetNPUInputs(inputs, outputs, npu_inputs); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| }; | |||
| template <class T> | |||
| kernel::LiteKernel *NPUKernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| @@ -0,0 +1,80 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/npu/tile_npu.h" | |||
| #include <memory> | |||
| #include "include/graph/op/all_ops.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "src/runtime/agent/npu/npu_converter_utils.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kNPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::schema::PrimitiveType_TileFusion; | |||
| namespace mindspore::kernel { | |||
| int TileNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| OpParameter *opParameter) { | |||
| if (inputs.size() != 2) { | |||
| return RET_ERROR; | |||
| } | |||
| auto multiple_tensor = inputs[1]; | |||
| if (multiple_tensor->ElementsNum() > 4) { | |||
| return RET_ERROR; | |||
| } | |||
| int *multiple_data = reinterpret_cast<int *>(multiple_tensor->data_c()); | |||
| if (multiple_data == nullptr) { | |||
| return RET_ERROR; | |||
| } | |||
| for (int i = 0; i < multiple_tensor->ElementsNum(); ++i) { | |||
| param_->multiples_[i] = multiple_data[i]; | |||
| } | |||
| param_->multiples_size_ = static_cast<size_t>(multiple_tensor->ElementsNum()); | |||
| return RET_OK; | |||
| } | |||
| int TileNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) { | |||
| op_ = new (std::nothrow) hiai::op::Tile(name_); | |||
| if (op_ == nullptr) { | |||
| MS_LOG(ERROR) << name_ << " op is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| op_->set_input_x(*npu_inputs[0]); | |||
| ge::TensorDesc multiple_tensor_desc(ge::Shape({static_cast<int64_t>(param_->multiples_size_)}), ge::FORMAT_NCHW, | |||
| ge::DT_INT32); | |||
| ge::TensorPtr multiple_tensor = std::make_shared<hiai::Tensor>(multiple_tensor_desc); | |||
| multiple_tensor->SetData(reinterpret_cast<uint8_t *>(param_->multiples_), param_->multiples_size_ * sizeof(int)); | |||
| multiple_ = new hiai::op::Const(name_ + "multiples"); | |||
| multiple_->set_attr_value(multiple_tensor); | |||
| op_->set_input_multiples(*multiple_); | |||
| return RET_OK; | |||
| } | |||
| ge::Operator *mindspore::kernel::TileNPUKernel::GetNPUOp() { return this->op_; } | |||
| TileNPUKernel::~TileNPUKernel() { | |||
| if (op_ != nullptr) { | |||
| delete op_; | |||
| op_ = nullptr; | |||
| } | |||
| if (multiple_ != nullptr) { | |||
| delete multiple_; | |||
| multiple_ = nullptr; | |||
| } | |||
| } | |||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_TileFusion, NPUKernelCreator<TileNPUKernel>) | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,47 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TILE_NPU_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TILE_NPU_H_ | |||
| #include <vector> | |||
| #include "src/runtime/kernel/npu/npu_kernel.h" | |||
| #include "include/graph/op/all_ops.h" | |||
| #include "nnacl/base/tile_base.h" | |||
| namespace mindspore::kernel { | |||
| class TileNPUKernel : public NPUKernel { | |||
| public: | |||
| TileNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | |||
| : NPUKernel(parameter, inputs, outputs, ctx) { | |||
| param_ = reinterpret_cast<TileParameter *>(parameter); | |||
| } | |||
| ~TileNPUKernel() override; | |||
| int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| OpParameter *opParameter) override; | |||
| int SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<ge::Operator *> &npu_inputs) override; | |||
| ge::Operator *GetNPUOp() override; | |||
| private: | |||
| hiai::op::Tile *op_ = nullptr; | |||
| hiai::op::Const *multiple_ = nullptr; | |||
| TileParameter *param_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TILE_NPU_H_ | |||
| @@ -33,10 +33,12 @@ int TransposeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, con | |||
| perm_.push_back(static_cast<int *>(inputs[1]->data_c())[i]); | |||
| } | |||
| } else { | |||
| MS_LOG(WARNING) << "NPU perm is attribute."; | |||
| MS_LOG(WARNING) << "NPU perm is attribute or input[1] data nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| if (inputs[1]->ElementsNum() != 4) { | |||
| return RET_OK; | |||
| } | |||
| return RET_ERROR; | |||
| } | |||