!15057 npu add ops and support select input from multi outputs

From: @zhaozhenlong Reviewed-by: @zhang_xue_tong,@zhanghaibo5 Signed-off-by: @zhang_xue_tong
4 years ago · c196b93839
--- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc
+++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc
@@ -16,6 +16,8 @@

 #include "src/runtime/agent/npu/subgraph_npu_kernel.h"
 #include <set>
 #include <unordered_map>
 #include <utility>
 #include "include/errorcode.h"
 #include "src/runtime/agent/npu/npu_executor.h"
 #include "include/graph/operator.h"
@@ -34,8 +36,10 @@ using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;

 static std::set<mindspore::schema::PrimitiveType> npu_specific_weight_nodes = {
  schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_ScaleFusion,
  schema::PrimitiveType_BatchNorm,    schema::PrimitiveType_FullConnection,        schema::PrimitiveType_InstanceNorm};
  schema::PrimitiveType_Conv2DFusion,   schema::PrimitiveType_Conv2dTransposeFusion,
  schema::PrimitiveType_ScaleFusion,    schema::PrimitiveType_BatchNorm,
  schema::PrimitiveType_FullConnection, schema::PrimitiveType_InstanceNorm,
  schema::PrimitiveType_TileFusion,     schema::PrimitiveType_PadFusion};

 SubGraphNpuKernel::~SubGraphNpuKernel() {
  subgraph_input_op_.clear();
@@ -95,7 +99,9 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
  op_buffer_.clear();
  for (auto node : this->nodes_) {
    std::vector<ge::Operator *> node_input_op;
    for (auto in_tensor : node->in_tensors()) {
    std::unordered_map<int, std::pair<ge::Operator *, int>> index2_multi_out_index;
    for (int i = 0; i < node->in_tensors().size(); ++i) {
      auto in_tensor = node->in_tensors()[i];
      if (IsSubGraphInputTensor(in_tensor)) {
        auto tensor_name = node->name() + "_" + std::to_string(count++);
        hiai::op::Data *data;
@@ -109,21 +115,24 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
      bool is_weight_tensor = true;
      for (auto in_kernel : node->in_kernels()) {
        if (IsContain(in_kernel->out_tensors(), in_tensor)) {
          if (in_kernel->desc().arch == mindspore::kernel::kNPU) {
            // input come from npu
            auto npu_op = reinterpret_cast<NPUKernel *>(in_kernel)->GetNPUOp();
            if (npu_op != nullptr) {
              node_input_op.push_back(npu_op);
              is_weight_tensor = false;
              break;
            } else {
              MS_LOG(ERROR) << in_kernel->type_str() << "NPU Operator is nullptr.";
              return RET_ERROR;
            }
          } else {
          if (in_kernel->desc().arch != mindspore::kernel::kNPU) {
            MS_LOG(ERROR) << "The input of the intermediate node comes from the CPU";
            return RET_ERROR;
          }
          // input come from npu
          auto npu_op = reinterpret_cast<NPUKernel *>(in_kernel)->GetNPUOp();
          if (npu_op == nullptr) {
            MS_LOG(ERROR) << in_kernel->type_str() << "NPU Operator is nullptr.";
            return RET_ERROR;
          }
          node_input_op.push_back(npu_op);
          if (in_kernel->out_tensors().size() != 1) {  // in_kernel has multi output, we record which output we want.
            int out_index = std::find(in_kernel->out_tensors().begin(), in_kernel->out_tensors().end(), in_tensor) -
                            in_kernel->out_tensors().begin();
            index2_multi_out_index[i] = {npu_op, out_index};
          }
          is_weight_tensor = false;
          break;
        }
      }

@@ -144,7 +153,8 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
      }
    }
    // set input to NPU
    int ret = reinterpret_cast<NPUKernel *>(node)->SetNPUInputs(node->in_tensors(), node->out_tensors(), node_input_op);
    int ret = reinterpret_cast<NPUKernel *>(node)->SetNPUInputs(node->in_tensors(), node->out_tensors(), node_input_op,
                                                                index2_multi_out_index);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << node->name() << " set npu inputs failed.";
      return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.cc
@@ -15,6 +15,8 @@
 */

 #include "src/runtime/kernel/npu/arithmetic_npu.h"
 #include <unordered_map>
 #include <utility>
 #include <string>
 #include "include/graph/op/all_ops.h"
 #include "src/kernel_registry.h"
@@ -165,6 +167,24 @@ int ArithmeticNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
  return RET_OK;
 }

 int ArithmeticNPUKernel::SetNPUInputs(
  const std::vector<mindspore::lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
  const std::vector<ge::Operator *> &npu_inputs,
  const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) {
  auto ret = SetNPUInputs(inputs, outputs, npu_inputs);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "ArithmeticNPUKernel SetNPUInputs failed";
    return RET_ERROR;
  }
  if (index2_multi_out_index.empty()) {
    return RET_OK;
  }
  for (auto it : index2_multi_out_index) {
    MS_LOG(INFO) << name_ << "set input " << it.first << " from " << it.second.first << " output " << it.second.second;
    op_->SetInput(it.first, *it.second.first, it.second.second);
  }
  return RET_OK;
 }
 ge::Operator *mindspore::kernel::ArithmeticNPUKernel::GetNPUOp() {
  if (activation_type_ == ActivationType_NO_ACTIVATION) {
    return op_;
--- a/mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.h
@@ -17,6 +17,8 @@
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_
 #include <vector>
 #include <unordered_map>
 #include <utility>
 #include "nnacl/arithmetic.h"
 #include "src/runtime/kernel/npu/npu_kernel.h"
 #include "include/graph/op/all_ops.h"
@@ -34,6 +36,9 @@ class ArithmeticNPUKernel : public NPUKernel {
                OpParameter *opParameter) override;
  int SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                   const std::vector<ge::Operator *> &npu_inputs) override;
  int SetNPUInputs(const std::vector<mindspore::lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                   const std::vector<ge::Operator *> &npu_inputs,
                   const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;

  ge::Operator *GetNPUOp() override;

--- a/mindspore/lite/src/runtime/kernel/npu/expand_dims_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/expand_dims_npu.cc
@@ -0,0 +1,55 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "src/runtime/kernel/npu/expand_dims_npu.h"
 #include "include/graph/op/all_ops.h"
 #include "src/kernel_registry.h"
 #include "src/runtime/agent/npu/npu_converter_utils.h"

 using mindspore::kernel::KERNEL_ARCH::kNPU;
 using mindspore::lite::KernelRegistrar;
 using mindspore::schema::PrimitiveType_ExpandDims;

 namespace mindspore::kernel {
 int ExpandDimsNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs,
                                   const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) {
  return RET_OK;
 }

 int ExpandDimsNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
                                      const std::vector<lite::Tensor *> &outputs,
                                      const std::vector<ge::Operator *> &npu_inputs) {
  op_ = new (std::nothrow) hiai::op::ExpandDims(name_);
  if (op_ == nullptr) {
    MS_LOG(ERROR) << name_ << " op is nullptr";
    return RET_ERROR;
  }
  op_->set_input_x(*npu_inputs[0]);
  op_->set_input_axis(*npu_inputs[1]);

  return RET_OK;
 }

 ge::Operator *mindspore::kernel::ExpandDimsNPUKernel::GetNPUOp() { return this->op_; }

 ExpandDimsNPUKernel::~ExpandDimsNPUKernel() {
  if (op_ != nullptr) {
    delete op_;
    op_ = nullptr;
  }
 }
 REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_ExpandDims, NPUKernelCreator<ExpandDimsNPUKernel>)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/expand_dims_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/expand_dims_npu.h
@@ -0,0 +1,42 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_EXPAND_DIMS_NPU_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_EXPAND_DIMS_NPU_H_
 #include <vector>
 #include "src/runtime/kernel/npu/npu_kernel.h"
 #include "include/graph/op/all_ops.h"

 namespace mindspore::kernel {
 class ExpandDimsNPUKernel : public NPUKernel {
 public:
  ExpandDimsNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
      : NPUKernel(parameter, inputs, outputs, ctx) {}
  ~ExpandDimsNPUKernel() override;

  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                OpParameter *opParameter) override;
  int SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                   const std::vector<ge::Operator *> &npu_inputs) override;

  ge::Operator *GetNPUOp() override;

 private:
  hiai::op::ExpandDims *op_ = nullptr;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_EXPAND_DIMS_NPU_H_
--- a/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h
+++ b/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h
@@ -18,6 +18,8 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_KERNEL_NPU_H_

 #include <vector>
 #include <unordered_map>
 #include <utility>
 #include "src/lite_kernel.h"
 #include "include/errorcode.h"
 #include "include/graph/graph.h"
@@ -46,6 +48,14 @@ class NPUKernel : public LiteKernel {
  virtual int SetNPUInputs(const std::vector<mindspore::lite::Tensor *> &inputs,
                           const std::vector<lite::Tensor *> &outputs,
                           const std::vector<ge::Operator *> &npu_inputs) = 0;
  virtual int SetNPUInputs(const std::vector<mindspore::lite::Tensor *> &inputs,
                           const std::vector<lite::Tensor *> &outputs, const std::vector<ge::Operator *> &npu_inputs,
                           const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) {
    if (index2_multi_out_index.empty()) {
      return SetNPUInputs(inputs, outputs, npu_inputs);
    }
    return RET_OK;
  }
 };
 template <class T>
 kernel::LiteKernel *NPUKernelCreator(const std::vector<lite::Tensor *> &inputs,
--- a/mindspore/lite/src/runtime/kernel/npu/tile_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/tile_npu.cc
@@ -0,0 +1,80 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "src/runtime/kernel/npu/tile_npu.h"
 #include <memory>
 #include "include/graph/op/all_ops.h"
 #include "src/kernel_registry.h"
 #include "src/runtime/agent/npu/npu_converter_utils.h"

 using mindspore::kernel::KERNEL_ARCH::kNPU;
 using mindspore::lite::KernelRegistrar;
 using mindspore::schema::PrimitiveType_TileFusion;

 namespace mindspore::kernel {
 int TileNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                             OpParameter *opParameter) {
  if (inputs.size() != 2) {
    return RET_ERROR;
  }
  auto multiple_tensor = inputs[1];
  if (multiple_tensor->ElementsNum() > 4) {
    return RET_ERROR;
  }
  int *multiple_data = reinterpret_cast<int *>(multiple_tensor->data_c());
  if (multiple_data == nullptr) {
    return RET_ERROR;
  }
  for (int i = 0; i < multiple_tensor->ElementsNum(); ++i) {
    param_->multiples_[i] = multiple_data[i];
  }
  param_->multiples_size_ = static_cast<size_t>(multiple_tensor->ElementsNum());
  return RET_OK;
 }

 int TileNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                                const std::vector<ge::Operator *> &npu_inputs) {
  op_ = new (std::nothrow) hiai::op::Tile(name_);
  if (op_ == nullptr) {
    MS_LOG(ERROR) << name_ << " op is nullptr";
    return RET_ERROR;
  }
  op_->set_input_x(*npu_inputs[0]);

  ge::TensorDesc multiple_tensor_desc(ge::Shape({static_cast<int64_t>(param_->multiples_size_)}), ge::FORMAT_NCHW,
                                      ge::DT_INT32);
  ge::TensorPtr multiple_tensor = std::make_shared<hiai::Tensor>(multiple_tensor_desc);
  multiple_tensor->SetData(reinterpret_cast<uint8_t *>(param_->multiples_), param_->multiples_size_ * sizeof(int));
  multiple_ = new hiai::op::Const(name_ + "multiples");
  multiple_->set_attr_value(multiple_tensor);
  op_->set_input_multiples(*multiple_);
  return RET_OK;
 }

 ge::Operator *mindspore::kernel::TileNPUKernel::GetNPUOp() { return this->op_; }

 TileNPUKernel::~TileNPUKernel() {
  if (op_ != nullptr) {
    delete op_;
    op_ = nullptr;
  }
  if (multiple_ != nullptr) {
    delete multiple_;
    multiple_ = nullptr;
  }
 }
 REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_TileFusion, NPUKernelCreator<TileNPUKernel>)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/tile_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/tile_npu.h
@@ -0,0 +1,47 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TILE_NPU_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TILE_NPU_H_
 #include <vector>
 #include "src/runtime/kernel/npu/npu_kernel.h"
 #include "include/graph/op/all_ops.h"
 #include "nnacl/base/tile_base.h"

 namespace mindspore::kernel {
 class TileNPUKernel : public NPUKernel {
 public:
  TileNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
      : NPUKernel(parameter, inputs, outputs, ctx) {
    param_ = reinterpret_cast<TileParameter *>(parameter);
  }
  ~TileNPUKernel() override;

  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                OpParameter *opParameter) override;
  int SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                   const std::vector<ge::Operator *> &npu_inputs) override;

  ge::Operator *GetNPUOp() override;

 private:
  hiai::op::Tile *op_ = nullptr;
  hiai::op::Const *multiple_ = nullptr;
  TileParameter *param_ = nullptr;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TILE_NPU_H_
--- a/mindspore/lite/src/runtime/kernel/npu/transpose_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/transpose_npu.cc
@@ -33,10 +33,12 @@ int TransposeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, con
      perm_.push_back(static_cast<int *>(inputs[1]->data_c())[i]);
    }
  } else {
    MS_LOG(WARNING) << "NPU perm is attribute.";
    MS_LOG(WARNING) << "NPU perm is attribute or input[1] data nullptr";
    return RET_ERROR;
  }

  if (inputs[1]->ElementsNum() != 4) {
    return RET_OK;
  }
  return RET_ERROR;
 }