From 812b6b9c101d9fd7cccc30f25062e8537dc5e84d Mon Sep 17 00:00:00 2001
From: zhaozhenlong <zhaozhenlong1@huawei.com>
Date: Wed, 14 Apr 2021 15:01:25 +0800
Subject: [PATCH] npu add CropAndResize op

transform pass target input fix

npu executor same input shape check fix
---
 .../src/runtime/agent/npu/npu_executor.cc     | 17 ++++-
 .../agent/npu/optimizer/npu_transform_pass.cc | 17 +++--
 .../runtime/kernel/npu/crop_and_resize_npu.cc | 73 +++++++++++++++++++
 .../runtime/kernel/npu/crop_and_resize_npu.h  | 45 ++++++++++++
 4 files changed, 146 insertions(+), 6 deletions(-)
 create mode 100644 mindspore/lite/src/runtime/kernel/npu/crop_and_resize_npu.cc
 create mode 100644 mindspore/lite/src/runtime/kernel/npu/crop_and_resize_npu.h
diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
index 99e47c5887..384e61b71a 100644
--- a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
+++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
@@ -62,6 +62,17 @@ std::vector<int> GetNpuTensorShape(int dim, std::shared_ptr<hiai::AiTensor> npu_
   return npu_shape;
 }
 
+std::vector<int> ExpandShapeTo4d(const std::vector<int> &shape) {
+  if (shape.size() == 0 || shape.size() >= 4) {
+    return shape;
+  }
+  std::vector<int> ret{shape};
+  for (auto i = shape.size(); i < 4; ++i) {
+    ret.push_back(1);
+  }
+  return ret;
+}
+
 bool IsSameShapeInTensor(Tensor *tensor, std::shared_ptr<hiai::AiTensor> npu_tensor) {
   if (tensor->shape().size() > 4) {
     MS_LOG(ERROR) << "Npu does not support input tensor dims greater than 4";
@@ -73,7 +84,11 @@ bool IsSameShapeInTensor(Tensor *tensor, std::shared_ptr<hiai::AiTensor> npu_ten
            tensor->Height() == npu_tensor->GetTensorDimension().GetHeight() &&
            tensor->Width() == npu_tensor->GetTensorDimension().GetWidth();
   }
-  return GetNpuTensorShape(tensor->shape().size(), npu_tensor) == tensor->shape();
+  std::vector<int> npu_shape{static_cast<int>(npu_tensor->GetTensorDimension().GetNumber()),
+                             static_cast<int>(npu_tensor->GetTensorDimension().GetChannel()),
+                             static_cast<int>(npu_tensor->GetTensorDimension().GetHeight()),
+                             static_cast<int>(npu_tensor->GetTensorDimension().GetWidth())};
+  return ExpandShapeTo4d(tensor->shape()) == npu_shape;
 }
 
 bool IsSameShapeOutTensor(Tensor *tensor, std::shared_ptr<hiai::AiTensor> npu_tensor) {
diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc
index 8a2855a257..f83d6f1c0c 100644
--- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc
+++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc
@@ -23,16 +23,23 @@ using kernel::KERNEL_ARCH::kNPU;
 
 std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = {
   schema::PrimitiveType_Conv2DFusion,  schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_Resize,
-  schema::PrimitiveType_MaxPoolFusion, schema::PrimitiveType_AvgPoolFusion,         schema::PrimitiveType_ScaleFusion};
+  schema::PrimitiveType_MaxPoolFusion, schema::PrimitiveType_AvgPoolFusion,         schema::PrimitiveType_ScaleFusion,
+  schema::PrimitiveType_CropAndResize};
 
 int NPUTransformPass::InsertPreNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels) {
   bool is_input_kernel = kernel->in_kernels().empty();
-  // single input
-  if (is_input_kernel || kernel->in_kernels()[0]->desc().arch != kNPU ||
-      npu_trans_nodes.find(kernel->in_kernels()[0]->Type()) == npu_trans_nodes.end()) {
+  // not always single input (like CropAndResize), but we care about the input with 4d.
+  auto it = std::find_if(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *k) {
+    return k->out_tensors().size() > 0 && k->out_tensors()[0]->shape().size() == 4;
+  });
+  if (!is_input_kernel && it == kernel->in_kernels().end()) {
+    MS_LOG(ERROR) << "NPU Transform pass does not find in kernel with 4d output";
+    return RET_ERROR;
+  }
+  if (is_input_kernel || (*it)->desc().arch != kNPU || npu_trans_nodes.find((*it)->Type()) == npu_trans_nodes.end()) {
     kernel::LiteKernel *pre_kernel = nullptr;
     if (!is_input_kernel) {
-      pre_kernel = kernel->in_kernels()[0];
+      pre_kernel = *it;
     }
 
     // Create pre transform kernel's out tensor.
diff --git a/mindspore/lite/src/runtime/kernel/npu/crop_and_resize_npu.cc b/mindspore/lite/src/runtime/kernel/npu/crop_and_resize_npu.cc
new file mode 100644
index 0000000000..eb1c6bb1d1
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/npu/crop_and_resize_npu.cc
@@ -0,0 +1,73 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/npu/crop_and_resize_npu.h"
+#include "src/kernel_registry.h"
+
+using mindspore::kernel::KERNEL_ARCH::kNPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::schema::PrimitiveType_CropAndResize;
+
+namespace mindspore::kernel {
+int CropAndResizeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs,
+                                      const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) {
+  // support only 0 linear and 1 nearest
+  if (param_->method_ != 0 && param_->method_ != 1) {
+    MS_LOG(WARNING) << "NPU CropAndResize only support method bilinear 0 and nearest 1, got " << param_->method_;
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int CropAndResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
+                                         const std::vector<lite::Tensor *> &outputs,
+                                         const std::vector<ge::Operator *> &npu_inputs) {
+  op_ = new (std::nothrow) hiai::op::CropAndResize(name_);
+  if (op_ == nullptr) {
+    MS_LOG(ERROR) << name_ << " op is nullptr";
+    return RET_ERROR;
+  }
+  if (npu_inputs.size() < 4) {
+    MS_LOG(ERROR) << "NPU CropAndResize got nput inputs size < 4";
+    return RET_ERROR;
+  }
+  op_->set_input_x(*npu_inputs[0]);
+  op_->set_input_boxes(*npu_inputs[1]);
+  op_->set_input_box_index(*npu_inputs[2]);
+  op_->set_input_crop_size(*npu_inputs[3]);
+  op_->set_attr_extrapolation_value(param_->extrapolation_value_);
+  if (param_->method_ == 0) {
+    op_->set_attr_method("bilinear");
+  } else if (param_->method_ == 1) {
+    op_->set_attr_method("nearest");
+  } else {
+    MS_LOG(ERROR) << "NPU CropAndResize only support method bilinear and nearest";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+ge::Operator *mindspore::kernel::CropAndResizeNPUKernel::GetNPUOp() { return this->op_; }
+
+CropAndResizeNPUKernel::~CropAndResizeNPUKernel() {
+  if (op_ != nullptr) {
+    delete op_;
+    op_ = nullptr;
+  }
+}
+
+REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_CropAndResize, NPUKernelCreator<CropAndResizeNPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/npu/crop_and_resize_npu.h b/mindspore/lite/src/runtime/kernel/npu/crop_and_resize_npu.h
new file mode 100644
index 0000000000..d7a41e5037
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/npu/crop_and_resize_npu.h
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CROP_AND_RESIZE_NPU_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CROP_AND_RESIZE_NPU_H_
+#include <vector>
+#include "nnacl/resize_parameter.h"
+#include "src/runtime/kernel/npu/npu_kernel.h"
+#include "include/graph/op/all_ops.h"
+
+namespace mindspore::kernel {
+class CropAndResizeNPUKernel : public NPUKernel {
+ public:
+  CropAndResizeNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : NPUKernel(parameter, inputs, outputs, ctx) {
+    param_ = reinterpret_cast<CropAndResizeParameter *>(parameter);
+  }
+  ~CropAndResizeNPUKernel() override;
+
+  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                OpParameter *opParameter) override;
+  int SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                   const std::vector<ge::Operator *> &npu_inputs) override;
+  ge::Operator *GetNPUOp() override;
+
+ private:
+  hiai::op::CropAndResize *op_ = nullptr;
+  CropAndResizeParameter *param_;
+};
+}  // namespace mindspore::kernel
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CROP_AND_RESIZE_NPU_H_