From: @yangruoqi713 Reviewed-by: @zhang_xue_tong,@hangangqiang Signed-off-by: @zhang_xue_tongtags/v1.2.0-rc1
| @@ -94,7 +94,7 @@ class LiteKernel { | |||||
| virtual int Init() { return mindspore::lite::RET_ERROR; } | virtual int Init() { return mindspore::lite::RET_ERROR; } | ||||
| OpParameter *op_parameter() { return op_parameter_; } | |||||
| OpParameter *op_parameter() const { return op_parameter_; } | |||||
| std::string name() const { return this->name_; } | std::string name() const { return this->name_; } | ||||
| @@ -37,23 +37,42 @@ int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| bool IsSameShapeTensor(Tensor *tensor, std::shared_ptr<hiai::AiTensor> npu_tensor) { | |||||
| return tensor->Batch() == npu_tensor->GetTensorDimension().GetNumber() && | |||||
| tensor->Channel() == npu_tensor->GetTensorDimension().GetChannel() && | |||||
| tensor->Height() == npu_tensor->GetTensorDimension().GetHeight() && | |||||
| tensor->Width() == npu_tensor->GetTensorDimension().GetWidth(); | |||||
| } | |||||
| int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | ||||
| const std::vector<kernel::LiteKernel *> &out_kernels, | const std::vector<kernel::LiteKernel *> &out_kernels, | ||||
| const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator, | const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator, | ||||
| const KernelCallBack &before, const KernelCallBack &after) { | const KernelCallBack &before, const KernelCallBack &after) { | ||||
| hiai::AiContext context; | hiai::AiContext context; | ||||
| for (int i = 0; i < npu_input_tensors_.size(); ++i) { | for (int i = 0; i < npu_input_tensors_.size(); ++i) { | ||||
| void *data = in_tensors[i]->data_c(); | |||||
| if (data == nullptr) { | |||||
| MS_LOG(ERROR) << model_name_ << " inputs data is nullptr"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[i]->Size()); | |||||
| in_tensors[i]->set_ref_count(in_tensors[i]->ref_count() - 1); | |||||
| if (in_tensors[i]->ref_count() <= 0) { | |||||
| auto ret = in_tensors[i]->FreeData(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Free tensor data failed"; | |||||
| int index = 0; | |||||
| for (; index < in_tensors.size(); index++) { | |||||
| if (IsSameShapeTensor(in_tensors[index], npu_input_tensors_[i])) { | |||||
| void *data = in_tensors[index]->data_c(); | |||||
| if (data == nullptr) { | |||||
| MS_LOG(ERROR) << model_name_ << " Inputs data is nullptr"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[index]->Size()); | |||||
| in_tensors[index]->set_ref_count(in_tensors[index]->ref_count() - 1); | |||||
| if (in_tensors[index]->ref_count() <= 0) { | |||||
| auto ret = in_tensors[index]->FreeData(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Free tensor data failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | |||||
| break; | |||||
| } | |||||
| if (index == in_tensors.size()) { | |||||
| MS_LOG(ERROR) << "Can't find corresponding ms lite tensor of " << i << " input tensor for npu executor " | |||||
| << model_name_; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| } | } | ||||
| @@ -23,15 +23,14 @@ namespace mindspore::lite { | |||||
| bool CheckFusion(kernel::LiteKernel *kernel) { | bool CheckFusion(kernel::LiteKernel *kernel) { | ||||
| auto pre_flag = | auto pre_flag = | ||||
| std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *in_kernel) { | std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *in_kernel) { | ||||
| return NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(in_kernel)) && | |||||
| in_kernel->out_kernels().size() == 1; | |||||
| return NPUPassUtils::IsNchw2Nhwc(in_kernel) && in_kernel->out_kernels().size() == 1; | |||||
| }); | }); | ||||
| if (!pre_flag) { | if (!pre_flag) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| auto post_flag = | auto post_flag = | ||||
| std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *out_kernel) { | std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *out_kernel) { | ||||
| return NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(out_kernel)); | |||||
| return NPUPassUtils::IsNhwc2Nchw(out_kernel) && (!out_kernel->out_kernels().empty()); | |||||
| }); | }); | ||||
| return post_flag; | return post_flag; | ||||
| } | } | ||||
| @@ -40,16 +39,16 @@ bool CheckFormatFusion(kernel::LiteKernel *kernel) { | |||||
| if (kernel->out_kernels().empty()) { | if (kernel->out_kernels().empty()) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| if (NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(kernel))) { | |||||
| if (NPUPassUtils::IsNhwc2Nchw(kernel)) { | |||||
| return std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), | return std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), | ||||
| [](const kernel::LiteKernel *kernel) { | [](const kernel::LiteKernel *kernel) { | ||||
| return NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(kernel)); | |||||
| return NPUPassUtils::IsNchw2Nhwc(kernel) && (!kernel->out_kernels().empty()); | |||||
| }); | }); | ||||
| } | } | ||||
| if (NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(kernel))) { | |||||
| if (NPUPassUtils::IsNchw2Nhwc(kernel)) { | |||||
| return std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), | return std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), | ||||
| [](const kernel::LiteKernel *kernel) { | [](const kernel::LiteKernel *kernel) { | ||||
| return NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(kernel)); | |||||
| return NPUPassUtils::IsNhwc2Nchw(kernel) && (!kernel->out_kernels().empty()); | |||||
| }); | }); | ||||
| } | } | ||||
| return false; | return false; | ||||
| @@ -230,8 +229,7 @@ int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) { | |||||
| int NPUFusionPass::Run() { | int NPUFusionPass::Run() { | ||||
| for (size_t i = 0; i < kernels->size(); i++) { | for (size_t i = 0; i < kernels->size(); i++) { | ||||
| auto kernel = (*kernels)[i]; | auto kernel = (*kernels)[i]; | ||||
| if (NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(kernel)) || | |||||
| NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(kernel))) { | |||||
| if (NPUPassUtils::IsNchw2Nhwc(kernel) || NPUPassUtils::IsNhwc2Nchw(kernel)) { | |||||
| if (CheckFormatFusion(kernel)) { | if (CheckFormatFusion(kernel)) { | ||||
| i--; | i--; | ||||
| FormatFusion(kernel); | FormatFusion(kernel); | ||||
| @@ -29,14 +29,10 @@ int GetInsertState(kernel::LiteKernel *kernel) { | |||||
| if (npu_insert_nodes.find(kernel->Type()) == npu_insert_nodes.end()) { | if (npu_insert_nodes.find(kernel->Type()) == npu_insert_nodes.end()) { | ||||
| return InsertNone; | return InsertNone; | ||||
| } | } | ||||
| auto pre_flag = | |||||
| std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *kernel) { | |||||
| return NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(kernel)); | |||||
| }); | |||||
| auto post_flag = | |||||
| std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) { | |||||
| return NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(kernel)); | |||||
| }); | |||||
| auto pre_flag = std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), | |||||
| [](const kernel::LiteKernel *kernel) { return NPUPassUtils::IsNchw2Nhwc(kernel); }); | |||||
| auto post_flag = std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), | |||||
| [](const kernel::LiteKernel *kernel) { return NPUPassUtils::IsNhwc2Nchw(kernel); }); | |||||
| if (pre_flag && !post_flag) { | if (pre_flag && !post_flag) { | ||||
| return PostInsert; | return PostInsert; | ||||
| } | } | ||||
| @@ -50,7 +46,7 @@ int NPUInsertTransformPass::InsertPreNode(const InnerContext *context, kernel::L | |||||
| std::vector<kernel::LiteKernel *> *trans_kernels, | std::vector<kernel::LiteKernel *> *trans_kernels, | ||||
| std::vector<Tensor *> *all_tensors) { | std::vector<Tensor *> *all_tensors) { | ||||
| for (auto in_kernel : kernel->in_kernels()) { | for (auto in_kernel : kernel->in_kernels()) { | ||||
| if (NPUPassUtils::IsNchw2Nhwc(const_cast<kernel::LiteKernel *>(in_kernel))) { | |||||
| if (NPUPassUtils::IsNchw2Nhwc(in_kernel)) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| auto nhwc_shape = in_kernel->out_tensors()[0]->shape(); | auto nhwc_shape = in_kernel->out_tensors()[0]->shape(); | ||||
| @@ -88,7 +84,7 @@ int NPUInsertTransformPass::InsertPostNode(const InnerContext *context, kernel:: | |||||
| std::vector<kernel::LiteKernel *> *trans_kernels, | std::vector<kernel::LiteKernel *> *trans_kernels, | ||||
| std::vector<Tensor *> *all_tensors) { | std::vector<Tensor *> *all_tensors) { | ||||
| for (auto out_kernel : kernel->out_kernels()) { | for (auto out_kernel : kernel->out_kernels()) { | ||||
| if (NPUPassUtils::IsNhwc2Nchw(const_cast<kernel::LiteKernel *>(out_kernel))) { | |||||
| if (NPUPassUtils::IsNhwc2Nchw(out_kernel)) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| auto nhwc_shape = kernel->out_tensors()[0]->shape(); | auto nhwc_shape = kernel->out_tensors()[0]->shape(); | ||||
| @@ -192,7 +192,7 @@ void NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, k | |||||
| post_kernel->set_in_tensors({post_in_tensors}); | post_kernel->set_in_tensors({post_in_tensors}); | ||||
| } | } | ||||
| bool NPUPassUtils::IsNhwc2Nchw(kernel::LiteKernel *kernel) { | |||||
| bool NPUPassUtils::IsNhwc2Nchw(const kernel::LiteKernel *kernel) { | |||||
| if (kernel->Type() != schema::PrimitiveType_Transpose) { | if (kernel->Type() != schema::PrimitiveType_Transpose) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -209,7 +209,7 @@ bool NPUPassUtils::IsNhwc2Nchw(kernel::LiteKernel *kernel) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| bool NPUPassUtils::IsNchw2Nhwc(kernel::LiteKernel *kernel) { | |||||
| bool NPUPassUtils::IsNchw2Nhwc(const kernel::LiteKernel *kernel) { | |||||
| if (kernel->Type() != schema::PrimitiveType_Transpose) { | if (kernel->Type() != schema::PrimitiveType_Transpose) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -47,9 +47,9 @@ class NPUPassUtils { | |||||
| static void UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | static void UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | ||||
| kernel::LiteKernel *post_kernel); | kernel::LiteKernel *post_kernel); | ||||
| static bool IsNhwc2Nchw(kernel::LiteKernel *kernel); | |||||
| static bool IsNhwc2Nchw(const kernel::LiteKernel *kernel); | |||||
| static bool IsNchw2Nhwc(kernel::LiteKernel *kernel); | |||||
| static bool IsNchw2Nhwc(const kernel::LiteKernel *kernel); | |||||
| private: | private: | ||||
| static PrimitiveC *CreateTransposePrimitive(); | static PrimitiveC *CreateTransposePrimitive(); | ||||
| @@ -93,25 +93,26 @@ int TransposeCPUKernel::Run() { | |||||
| memcpy(out_data_, in_data_, in_tensor->ElementsNum() * sizeof(float)); | memcpy(out_data_, in_data_, in_tensor->ElementsNum() * sizeof(float)); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| auto out_shape = out_tensor->shape(); | |||||
| if (in_tensor->shape().size() == 4 && param->perm_[0] == 0 && param->perm_[1] == 2 && param->perm_[2] == 3 && | if (in_tensor->shape().size() == 4 && param->perm_[0] == 0 && param->perm_[1] == 2 && param->perm_[2] == 3 && | ||||
| param->perm_[3] == 1) { | param->perm_[3] == 1) { | ||||
| if (in_tensor->data_type() == kNumberTypeFloat32) { | if (in_tensor->data_type() == kNumberTypeFloat32) { | ||||
| PackNCHWToNHWCFp32(in_tensor->MutableData(), out_tensor->MutableData(), out_tensor->Batch(), | |||||
| out_tensor->Height() * out_tensor->Width(), out_tensor->Channel()); | |||||
| PackNCHWToNHWCFp32(in_tensor->MutableData(), out_tensor->MutableData(), out_shape[0], out_shape[1] * out_shape[2], | |||||
| out_shape[3]); | |||||
| } else if (in_tensor->data_type() == kNumberTypeInt8) { | } else if (in_tensor->data_type() == kNumberTypeInt8) { | ||||
| PackNCHWToNHWCInt8(in_tensor->MutableData(), out_tensor->MutableData(), out_tensor->Batch(), | |||||
| out_tensor->Height() * out_tensor->Width(), out_tensor->Channel()); | |||||
| PackNCHWToNHWCInt8(in_tensor->MutableData(), out_tensor->MutableData(), out_shape[0], out_shape[1] * out_shape[2], | |||||
| out_shape[3]); | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| if (in_tensor->shape().size() == 4 && param->perm_[0] == 0 && param->perm_[1] == 3 && param->perm_[2] == 1 && | if (in_tensor->shape().size() == 4 && param->perm_[0] == 0 && param->perm_[1] == 3 && param->perm_[2] == 1 && | ||||
| param->perm_[3] == 2) { | param->perm_[3] == 2) { | ||||
| if (in_tensor->data_type() == kNumberTypeFloat32) { | if (in_tensor->data_type() == kNumberTypeFloat32) { | ||||
| PackNHWCToNCHWFp32(in_tensor->MutableData(), out_tensor->MutableData(), out_tensor->Batch(), | |||||
| out_tensor->Height() * out_tensor->Width(), out_tensor->Channel()); | |||||
| PackNHWCToNCHWFp32(in_tensor->MutableData(), out_tensor->MutableData(), out_shape[0], out_shape[2] * out_shape[3], | |||||
| out_shape[1]); | |||||
| } else if (in_tensor->data_type() == kNumberTypeInt8) { | } else if (in_tensor->data_type() == kNumberTypeInt8) { | ||||
| PackNHWCToNCHWInt8(in_tensor->MutableData(), out_tensor->MutableData(), out_tensor->Batch(), | |||||
| out_tensor->Height() * out_tensor->Width(), out_tensor->Channel()); | |||||
| PackNHWCToNCHWInt8(in_tensor->MutableData(), out_tensor->MutableData(), out_shape[0], out_shape[2] * out_shape[3], | |||||
| out_shape[1]); | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -24,17 +24,13 @@ using mindspore::schema::PrimitiveType_DeConv2D; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int DeconvolutionNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, | int DeconvolutionNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) { | const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) { | ||||
| if (conv_param_->group_ != 1) { | |||||
| MS_LOG(WARNING) << "Only support group equals 1 for npu deconvolution op"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int DeconvolutionNPUKernel::SetConvParam() { | int DeconvolutionNPUKernel::SetConvParam() { | ||||
| deconv_->set_attr_strides(ge::AttrValue::LIST_INT({conv_param_->stride_h_, conv_param_->stride_w_})); | deconv_->set_attr_strides(ge::AttrValue::LIST_INT({conv_param_->stride_h_, conv_param_->stride_w_})); | ||||
| deconv_->set_attr_dilations(ge::AttrValue::LIST_INT({conv_param_->dilation_h_, conv_param_->dilation_w_})); | deconv_->set_attr_dilations(ge::AttrValue::LIST_INT({conv_param_->dilation_h_, conv_param_->dilation_w_})); | ||||
| deconv_->set_attr_groups(conv_param_->group_); | |||||
| deconv_->set_attr_groups(1); | |||||
| if (conv_param_->pad_mode_ == Pad_Same) { | if (conv_param_->pad_mode_ == Pad_Same) { | ||||
| deconv_->set_attr_pad_mode(ge::AttrValue::STR{"SAME"}); | deconv_->set_attr_pad_mode(ge::AttrValue::STR{"SAME"}); | ||||