!10908 [MSLITE][Develop] fix bug of conv depthwise indirect buffer

From: @yangruoqi713 Reviewed-by: @hangangqiang,@zhang_xue_tong Signed-off-by: @zhang_xue_tong
5 years ago · 909dcb4051
--- a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
+++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
@@ -38,16 +38,16 @@ int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
 }

 bool IsSameShapeTensor(Tensor *tensor, std::shared_ptr<hiai::AiTensor> npu_tensor) {
  if (tensor->shape().size() > 4) {
    MS_LOG(ERROR) << "Npu does not support input tensor dims greater than 4";
    return false;
  }
  if (tensor->shape().size() == 4) {
    return tensor->Batch() == npu_tensor->GetTensorDimension().GetNumber() &&
           tensor->Channel() == npu_tensor->GetTensorDimension().GetChannel() &&
           tensor->Height() == npu_tensor->GetTensorDimension().GetHeight() &&
           tensor->Width() == npu_tensor->GetTensorDimension().GetWidth();
  }
  if (tensor->shape().size() > 4) {
    MS_LOG(ERROR) << "Npu doesn't support input tensor dims greater than 4";
    return false;
  }
  std::vector<int> npu_shape;
  auto dim = tensor->shape().size();
  if (dim > 0) {
@@ -57,7 +57,7 @@ bool IsSameShapeTensor(Tensor *tensor, std::shared_ptr<hiai::AiTensor> npu_tenso
    npu_shape.push_back(npu_tensor->GetTensorDimension().GetChannel());
  }
  if (dim > 2) {
    npu_shape.push_back(npu_tensor->GetTensorDimension().GetWidth());
    npu_shape.push_back(npu_tensor->GetTensorDimension().GetHeight());
  }
  return npu_shape == tensor->shape();
 }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc
@@ -68,17 +68,18 @@ int ConvolutionDepthwiseIndirectCPUKernel::InitWeightBias() {
                                    weight_tensor->Batch());
 #endif

  auto bias_tensor = in_tensors_[kBiasIndex];
  bias_data_ = reinterpret_cast<float *>(malloc(batch_flag * div_flag * sizeof(float)));
  if (bias_data_ == nullptr) {
    MS_LOG(ERROR) << "Malloc buffer failed.";
    return RET_ERROR;
  }

  memset(bias_data_, 0, batch_flag * div_flag * sizeof(float));
  if (in_tensors_.size() == kInputSize2) {
    auto bias_tensor = in_tensors_[kBiasIndex];
    auto ori_bias = reinterpret_cast<float *>(bias_tensor->MutableData());
    memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float));
  } else {
    memset(bias_data_, 0, batch_flag * div_flag * sizeof(float));
  }

  // malloc zero ptr
--- a/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h
+++ b/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h
@@ -58,6 +58,10 @@ kernel::LiteKernel *NPUKernelCreator(const std::vector<lite::Tensor *> &inputs,
                  << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(primitive->Type()));
    return nullptr;
  }
  if (inputs[0]->shape().size() > 4) {
    MS_LOG(ERROR) << "Npu does not support input tensor dims greater than 4";
    return nullptr;
  }

  auto *kernel = new (std::nothrow) T(opParameter, inputs, outputs, ctx, primitive);
  if (kernel == nullptr) {
--- a/mindspore/lite/src/runtime/kernel/npu/softmax_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/softmax_npu.cc
@@ -24,10 +24,6 @@ using mindspore::schema::PrimitiveType_SoftMax;
 namespace mindspore::kernel {
 int SoftmaxNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                                OpParameter *opParameter) {
  if (inputs[0]->shape().size() > 4) {
    MS_LOG(ERROR) << "Npu softmax only supports tensor'dim less than 4.";
    return RET_ERROR;
  }
  return RET_OK;
 }