From 8236921ecd3cbcc0c3e1f032824ac2e9de65449b Mon Sep 17 00:00:00 2001
From: yangruoqi713 <yangruoqi@huawei.com>
Date: Mon, 4 Jan 2021 10:10:31 +0800
Subject: [PATCH] [MSLITE][Develop] fix bug of conv depthwise indirect buffer

---
 mindspore/lite/src/runtime/agent/npu/npu_executor.cc   | 10 +++++-----
 .../arm/fp32/convolution_depthwise_indirect_fp32.cc    |  5 +++--
 mindspore/lite/src/runtime/kernel/npu/npu_kernel.h     |  4 ++++
 mindspore/lite/src/runtime/kernel/npu/softmax_npu.cc   |  4 ----
 4 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
index 81b9b78c9f..b5c579b558 100644
--- a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
+++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
@@ -38,16 +38,16 @@ int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
 }
 
 bool IsSameShapeTensor(Tensor *tensor, std::shared_ptr<hiai::AiTensor> npu_tensor) {
+  if (tensor->shape().size() > 4) {
+    MS_LOG(ERROR) << "Npu does not support input tensor dims greater than 4";
+    return false;
+  }
   if (tensor->shape().size() == 4) {
     return tensor->Batch() == npu_tensor->GetTensorDimension().GetNumber() &&
            tensor->Channel() == npu_tensor->GetTensorDimension().GetChannel() &&
            tensor->Height() == npu_tensor->GetTensorDimension().GetHeight() &&
            tensor->Width() == npu_tensor->GetTensorDimension().GetWidth();
   }
-  if (tensor->shape().size() > 4) {
-    MS_LOG(ERROR) << "Npu doesn't support input tensor dims greater than 4";
-    return false;
-  }
   std::vector<int> npu_shape;
   auto dim = tensor->shape().size();
   if (dim > 0) {
@@ -57,7 +57,7 @@ bool IsSameShapeTensor(Tensor *tensor, std::shared_ptr<hiai::AiTensor> npu_tenso
     npu_shape.push_back(npu_tensor->GetTensorDimension().GetChannel());
   }
   if (dim > 2) {
-    npu_shape.push_back(npu_tensor->GetTensorDimension().GetWidth());
+    npu_shape.push_back(npu_tensor->GetTensorDimension().GetHeight());
   }
   return npu_shape == tensor->shape();
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc
index 156ceed79a..f07f1422df 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc
@@ -68,17 +68,18 @@ int ConvolutionDepthwiseIndirectCPUKernel::InitWeightBias() {
                                     weight_tensor->Batch());
 #endif
 
-  auto bias_tensor = in_tensors_[kBiasIndex];
   bias_data_ = reinterpret_cast<float *>(malloc(batch_flag * div_flag * sizeof(float)));
   if (bias_data_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
   }
 
-  memset(bias_data_, 0, batch_flag * div_flag * sizeof(float));
   if (in_tensors_.size() == kInputSize2) {
+    auto bias_tensor = in_tensors_[kBiasIndex];
     auto ori_bias = reinterpret_cast<float *>(bias_tensor->MutableData());
     memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float));
+  } else {
+    memset(bias_data_, 0, batch_flag * div_flag * sizeof(float));
   }
 
   // malloc zero ptr
diff --git a/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h b/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h
index 8e06b4ada1..80d8bf772e 100644
--- a/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h
+++ b/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h
@@ -58,6 +58,10 @@ kernel::LiteKernel *NPUKernelCreator(const std::vector<lite::Tensor *> &inputs,
                   << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(primitive->Type()));
     return nullptr;
   }
+  if (inputs[0]->shape().size() > 4) {
+    MS_LOG(ERROR) << "Npu does not support input tensor dims greater than 4";
+    return nullptr;
+  }
 
   auto *kernel = new (std::nothrow) T(opParameter, inputs, outputs, ctx, primitive);
   if (kernel == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/npu/softmax_npu.cc b/mindspore/lite/src/runtime/kernel/npu/softmax_npu.cc
index c8cbeecd6b..a502a86109 100644
--- a/mindspore/lite/src/runtime/kernel/npu/softmax_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/softmax_npu.cc
@@ -24,10 +24,6 @@ using mindspore::schema::PrimitiveType_SoftMax;
 namespace mindspore::kernel {
 int SoftmaxNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                                 OpParameter *opParameter) {
-  if (inputs[0]->shape().size() > 4) {
-    MS_LOG(ERROR) << "Npu softmax only supports tensor'dim less than 4.";
-    return RET_ERROR;
-  }
   return RET_OK;
 }