From 601697439c16868fdae965b706ccb0cf08151d02 Mon Sep 17 00:00:00 2001 From: yangruoqi713 Date: Tue, 29 Dec 2020 11:24:44 +0800 Subject: [PATCH] [MSLITE][DEVELOP] debug npu model, fix bug of npu --- mindspore/lite/src/lite_kernel.h | 2 +- .../src/runtime/agent/npu/npu_executor.cc | 41 ++++++++++++++----- .../agent/npu/optimizer/npu_fusion_pass.cc | 16 ++++---- .../optimizer/npu_insert_transform_pass.cc | 16 +++----- .../agent/npu/optimizer/npu_pass_utils.cc | 4 +- .../agent/npu/optimizer/npu_pass_utils.h | 4 +- .../runtime/kernel/arm/fp32/transpose_fp32.cc | 17 ++++---- .../runtime/kernel/npu/deconvolution_npu.cc | 6 +-- 8 files changed, 58 insertions(+), 48 deletions(-) diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h index 49fac4febd..8af23f0cc2 100644 --- a/mindspore/lite/src/lite_kernel.h +++ b/mindspore/lite/src/lite_kernel.h @@ -94,7 +94,7 @@ class LiteKernel { virtual int Init() { return mindspore::lite::RET_ERROR; } - OpParameter *op_parameter() { return op_parameter_; } + OpParameter *op_parameter() const { return op_parameter_; } std::string name() const { return this->name_; } diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc index f30be3053c..34effac772 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc +++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc @@ -37,23 +37,42 @@ int NPUExecutor::Prepare(const std::vector &kernels) { return RET_OK; } +bool IsSameShapeTensor(Tensor *tensor, std::shared_ptr npu_tensor) { + return tensor->Batch() == npu_tensor->GetTensorDimension().GetNumber() && + tensor->Channel() == npu_tensor->GetTensorDimension().GetChannel() && + tensor->Height() == npu_tensor->GetTensorDimension().GetHeight() && + tensor->Width() == npu_tensor->GetTensorDimension().GetWidth(); +} + int NPUExecutor::Run(const std::vector &in_tensors, const std::vector &out_tensors, const std::vector &out_kernels, const std::vector &kernels, Allocator *allocator, const KernelCallBack &before, const KernelCallBack &after) { hiai::AiContext context; for (int i = 0; i < npu_input_tensors_.size(); ++i) { - void *data = in_tensors[i]->data_c(); - if (data == nullptr) { - MS_LOG(ERROR) << model_name_ << " inputs data is nullptr"; - return RET_ERROR; - } - memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[i]->Size()); - in_tensors[i]->set_ref_count(in_tensors[i]->ref_count() - 1); - if (in_tensors[i]->ref_count() <= 0) { - auto ret = in_tensors[i]->FreeData(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Free tensor data failed"; + int index = 0; + for (; index < in_tensors.size(); index++) { + if (IsSameShapeTensor(in_tensors[index], npu_input_tensors_[i])) { + void *data = in_tensors[index]->data_c(); + if (data == nullptr) { + MS_LOG(ERROR) << model_name_ << " Inputs data is nullptr"; + return RET_ERROR; + } + + memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[index]->Size()); + in_tensors[index]->set_ref_count(in_tensors[index]->ref_count() - 1); + if (in_tensors[index]->ref_count() <= 0) { + auto ret = in_tensors[index]->FreeData(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Free tensor data failed"; + return RET_ERROR; + } + } + break; + } + if (index == in_tensors.size()) { + MS_LOG(ERROR) << "Can't find corresponding ms lite tensor of " << i << " input tensor for npu executor " + << model_name_; return RET_ERROR; } } diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc index 65d8c6bfb4..fd0748d033 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc @@ -23,15 +23,14 @@ namespace mindspore::lite { bool CheckFusion(kernel::LiteKernel *kernel) { auto pre_flag = std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *in_kernel) { - return NPUPassUtils::IsNchw2Nhwc(const_cast(in_kernel)) && - in_kernel->out_kernels().size() == 1; + return NPUPassUtils::IsNchw2Nhwc(in_kernel) && in_kernel->out_kernels().size() == 1; }); if (!pre_flag) { return false; } auto post_flag = std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *out_kernel) { - return NPUPassUtils::IsNhwc2Nchw(const_cast(out_kernel)); + return NPUPassUtils::IsNhwc2Nchw(out_kernel) && (!out_kernel->out_kernels().empty()); }); return post_flag; } @@ -40,16 +39,16 @@ bool CheckFormatFusion(kernel::LiteKernel *kernel) { if (kernel->out_kernels().empty()) { return false; } - if (NPUPassUtils::IsNhwc2Nchw(const_cast(kernel))) { + if (NPUPassUtils::IsNhwc2Nchw(kernel)) { return std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) { - return NPUPassUtils::IsNchw2Nhwc(const_cast(kernel)); + return NPUPassUtils::IsNchw2Nhwc(kernel) && (!kernel->out_kernels().empty()); }); } - if (NPUPassUtils::IsNchw2Nhwc(const_cast(kernel))) { + if (NPUPassUtils::IsNchw2Nhwc(kernel)) { return std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) { - return NPUPassUtils::IsNhwc2Nchw(const_cast(kernel)); + return NPUPassUtils::IsNhwc2Nchw(kernel) && (!kernel->out_kernels().empty()); }); } return false; @@ -230,8 +229,7 @@ int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) { int NPUFusionPass::Run() { for (size_t i = 0; i < kernels->size(); i++) { auto kernel = (*kernels)[i]; - if (NPUPassUtils::IsNchw2Nhwc(const_cast(kernel)) || - NPUPassUtils::IsNhwc2Nchw(const_cast(kernel))) { + if (NPUPassUtils::IsNchw2Nhwc(kernel) || NPUPassUtils::IsNhwc2Nchw(kernel)) { if (CheckFormatFusion(kernel)) { i--; FormatFusion(kernel); diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc index 1cc6a93dc3..8385a20df8 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc @@ -29,14 +29,10 @@ int GetInsertState(kernel::LiteKernel *kernel) { if (npu_insert_nodes.find(kernel->Type()) == npu_insert_nodes.end()) { return InsertNone; } - auto pre_flag = - std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *kernel) { - return NPUPassUtils::IsNchw2Nhwc(const_cast(kernel)); - }); - auto post_flag = - std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) { - return NPUPassUtils::IsNhwc2Nchw(const_cast(kernel)); - }); + auto pre_flag = std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), + [](const kernel::LiteKernel *kernel) { return NPUPassUtils::IsNchw2Nhwc(kernel); }); + auto post_flag = std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), + [](const kernel::LiteKernel *kernel) { return NPUPassUtils::IsNhwc2Nchw(kernel); }); if (pre_flag && !post_flag) { return PostInsert; } @@ -50,7 +46,7 @@ int NPUInsertTransformPass::InsertPreNode(const InnerContext *context, kernel::L std::vector *trans_kernels, std::vector *all_tensors) { for (auto in_kernel : kernel->in_kernels()) { - if (NPUPassUtils::IsNchw2Nhwc(const_cast(in_kernel))) { + if (NPUPassUtils::IsNchw2Nhwc(in_kernel)) { continue; } auto nhwc_shape = in_kernel->out_tensors()[0]->shape(); @@ -88,7 +84,7 @@ int NPUInsertTransformPass::InsertPostNode(const InnerContext *context, kernel:: std::vector *trans_kernels, std::vector *all_tensors) { for (auto out_kernel : kernel->out_kernels()) { - if (NPUPassUtils::IsNhwc2Nchw(const_cast(out_kernel))) { + if (NPUPassUtils::IsNhwc2Nchw(out_kernel)) { continue; } auto nhwc_shape = kernel->out_tensors()[0]->shape(); diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc index ac0d2d686f..e93405758c 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc @@ -192,7 +192,7 @@ void NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, k post_kernel->set_in_tensors({post_in_tensors}); } -bool NPUPassUtils::IsNhwc2Nchw(kernel::LiteKernel *kernel) { +bool NPUPassUtils::IsNhwc2Nchw(const kernel::LiteKernel *kernel) { if (kernel->Type() != schema::PrimitiveType_Transpose) { return false; } @@ -209,7 +209,7 @@ bool NPUPassUtils::IsNhwc2Nchw(kernel::LiteKernel *kernel) { return false; } -bool NPUPassUtils::IsNchw2Nhwc(kernel::LiteKernel *kernel) { +bool NPUPassUtils::IsNchw2Nhwc(const kernel::LiteKernel *kernel) { if (kernel->Type() != schema::PrimitiveType_Transpose) { return false; } diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h index d7bdedf0e8..b6601eca4a 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h @@ -47,9 +47,9 @@ class NPUPassUtils { static void UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, kernel::LiteKernel *post_kernel); - static bool IsNhwc2Nchw(kernel::LiteKernel *kernel); + static bool IsNhwc2Nchw(const kernel::LiteKernel *kernel); - static bool IsNchw2Nhwc(kernel::LiteKernel *kernel); + static bool IsNchw2Nhwc(const kernel::LiteKernel *kernel); private: static PrimitiveC *CreateTransposePrimitive(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc index 643a4cca92..d8b4a3f63f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc @@ -93,25 +93,26 @@ int TransposeCPUKernel::Run() { memcpy(out_data_, in_data_, in_tensor->ElementsNum() * sizeof(float)); return RET_OK; } + auto out_shape = out_tensor->shape(); if (in_tensor->shape().size() == 4 && param->perm_[0] == 0 && param->perm_[1] == 2 && param->perm_[2] == 3 && param->perm_[3] == 1) { if (in_tensor->data_type() == kNumberTypeFloat32) { - PackNCHWToNHWCFp32(in_tensor->MutableData(), out_tensor->MutableData(), out_tensor->Batch(), - out_tensor->Height() * out_tensor->Width(), out_tensor->Channel()); + PackNCHWToNHWCFp32(in_tensor->MutableData(), out_tensor->MutableData(), out_shape[0], out_shape[1] * out_shape[2], + out_shape[3]); } else if (in_tensor->data_type() == kNumberTypeInt8) { - PackNCHWToNHWCInt8(in_tensor->MutableData(), out_tensor->MutableData(), out_tensor->Batch(), - out_tensor->Height() * out_tensor->Width(), out_tensor->Channel()); + PackNCHWToNHWCInt8(in_tensor->MutableData(), out_tensor->MutableData(), out_shape[0], out_shape[1] * out_shape[2], + out_shape[3]); } return RET_OK; } if (in_tensor->shape().size() == 4 && param->perm_[0] == 0 && param->perm_[1] == 3 && param->perm_[2] == 1 && param->perm_[3] == 2) { if (in_tensor->data_type() == kNumberTypeFloat32) { - PackNHWCToNCHWFp32(in_tensor->MutableData(), out_tensor->MutableData(), out_tensor->Batch(), - out_tensor->Height() * out_tensor->Width(), out_tensor->Channel()); + PackNHWCToNCHWFp32(in_tensor->MutableData(), out_tensor->MutableData(), out_shape[0], out_shape[2] * out_shape[3], + out_shape[1]); } else if (in_tensor->data_type() == kNumberTypeInt8) { - PackNHWCToNCHWInt8(in_tensor->MutableData(), out_tensor->MutableData(), out_tensor->Batch(), - out_tensor->Height() * out_tensor->Width(), out_tensor->Channel()); + PackNHWCToNCHWInt8(in_tensor->MutableData(), out_tensor->MutableData(), out_shape[0], out_shape[2] * out_shape[3], + out_shape[1]); } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/npu/deconvolution_npu.cc b/mindspore/lite/src/runtime/kernel/npu/deconvolution_npu.cc index ac15301345..524732ff54 100644 --- a/mindspore/lite/src/runtime/kernel/npu/deconvolution_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/deconvolution_npu.cc @@ -24,17 +24,13 @@ using mindspore::schema::PrimitiveType_DeConv2D; namespace mindspore::kernel { int DeconvolutionNPUKernel::IsSupport(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter) { - if (conv_param_->group_ != 1) { - MS_LOG(WARNING) << "Only support group equals 1 for npu deconvolution op"; - return RET_ERROR; - } return RET_OK; } int DeconvolutionNPUKernel::SetConvParam() { deconv_->set_attr_strides(ge::AttrValue::LIST_INT({conv_param_->stride_h_, conv_param_->stride_w_})); deconv_->set_attr_dilations(ge::AttrValue::LIST_INT({conv_param_->dilation_h_, conv_param_->dilation_w_})); - deconv_->set_attr_groups(conv_param_->group_); + deconv_->set_attr_groups(1); if (conv_param_->pad_mode_ == Pad_Same) { deconv_->set_attr_pad_mode(ge::AttrValue::STR{"SAME"});