diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc index 13f04e8091..6b50fe5743 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc +++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc @@ -17,7 +17,7 @@ #include "src/runtime/agent/npu/npu_executor.h" #include "include/errorcode.h" #include "src/runtime/agent/npu/npu_manager.h" - +#include "nnacl/pack.h" namespace mindspore::lite { int NPUExecutor::Prepare(const std::vector &kernels) { this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(); @@ -32,12 +32,23 @@ int NPUExecutor::Prepare(const std::vector &kernels) { return RET_OK; } -int NPUExecutor::Run(std::vector &in_tensors, std::vector &out_tensors, - std::vector &kernels, Allocator *allocator, const KernelCallBack &before, +int NPUExecutor::Run(const std::vector &in_tensors, const std::vector &out_tensors, + const std::vector &kernels, const std::vector &inputs_nhwc2nchw, + const std::vector &outputs_nchw2nhwc, Allocator *allocator, const KernelCallBack &before, const KernelCallBack &after) { hiai::AiContext context; for (int i = 0; i < npu_input_tensors_.size(); ++i) { - memcpy(npu_input_tensors_[i]->GetBuffer(), in_tensors[i]->data_c(), in_tensors[i]->Size()); + void *data = in_tensors[i]->data_c(); + if (data == nullptr) { + MS_LOG(ERROR) << model_name_ << " inputs data is nullptr"; + return RET_ERROR; + } + if (inputs_nhwc2nchw[i]) { + PackNHWCToNCHWFp32(data, npu_input_tensors_[i]->GetBuffer(), in_tensors[i]->Batch(), + in_tensors[i]->Width() * in_tensors[i]->Height(), in_tensors[i]->Channel()); + } else { + memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[i]->Size()); + } } context.AddPara("model_name", model_name_); if (this->client_ == nullptr) { @@ -52,10 +63,19 @@ int NPUExecutor::Run(std::vector &in_tensors, std::vector &o } for (int i = 0; i < npu_output_tensors_.size(); ++i) { - memcpy(out_tensors[i]->MutableData(), npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); + void *data = out_tensors[i]->MutableData(); + if (data == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + if (outputs_nchw2nhwc[i]) { + PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data, out_tensors[i]->Batch(), + out_tensors[i]->Width() * out_tensors[i]->Height(), out_tensors[i]->Channel()); + } else { + memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); + } out_tensors[i]->ResetRefCount(); } - return RET_OK; } diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.h b/mindspore/lite/src/runtime/agent/npu/npu_executor.h index 782ec4779b..d3645f3143 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_executor.h +++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.h @@ -31,9 +31,10 @@ class NPUExecutor : public Executor { ~NPUExecutor() override = default; int Prepare(const std::vector &kernels) override; - int Run(std::vector &in_tensors, std::vector &out_tensors, - std::vector &kernels, Allocator *allocator = nullptr, - const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr) override; + int Run(const std::vector &in_tensors, const std::vector &out_tensors, + const std::vector &kernels, const std::vector &inputs_nhwc2nchw, + const std::vector &outputs_nchw2nhwc, Allocator *allocator = nullptr, + const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr); private: int GetIOTensorVec(); diff --git a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc index f300da5f85..2c1f06367a 100644 --- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc +++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc @@ -34,6 +34,10 @@ namespace mindspore::kernel { using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; +std::set trans_nodes = {schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, + schema::PrimitiveType_DepthwiseConv2D, + schema::PrimitiveType_DeDepthwiseConv2D, schema::PrimitiveType_Resize}; + domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { ge::Graph graph("NPUGraph"); @@ -70,12 +74,12 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { return om_model_buff; } -int SubGraphNpuKernel::Run() { return this->executor_->Run(in_tensors_, out_tensors_, nodes_, nullptr); } +int SubGraphNpuKernel::Run() { + return reinterpret_cast(this->executor_) + ->Run(in_tensors_, out_tensors_, nodes_, inputs_nhwc2nchw_, outputs_nchw2nhwc_); +} int SubGraphNpuKernel::BuildNPUInputOp() { - std::set trans_nodes = {schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, - schema::PrimitiveType_DepthwiseConv2D, - schema::PrimitiveType_DeDepthwiseConv2D}; int count = 0; subgraph_input_op_.clear(); for (auto node : this->nodes_) { @@ -94,8 +98,10 @@ int SubGraphNpuKernel::BuildNPUInputOp() { ge::TensorDesc tensor_desc(lite::ConverterToNPUShape({shape[0], shape[3], shape[1], shape[2]}), ge::FORMAT_NCHW, lite::ConverterToNPUDataType(in_tensor->data_type())); data->update_input_desc_x(tensor_desc); + inputs_nhwc2nchw_.push_back(true); } else { data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name); + inputs_nhwc2nchw_.push_back(false); } subgraph_input_op_.push_back(*data); node_input_op.push_back(data); @@ -156,6 +162,11 @@ std::vector SubGraphNpuKernel::GetNPUNodes(const vector(nodes[i])->GetNPUOp()); + if (trans_nodes.find(schema::PrimitiveType(nodes[i]->GetPrimitive()->Type())) != trans_nodes.end()) { + outputs_nchw2nhwc_.push_back(true); + } else { + outputs_nchw2nhwc_.push_back(false); + } } return ops; } diff --git a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h index 2ed220a561..031bf232b1 100644 --- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h +++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h @@ -69,6 +69,10 @@ class SubGraphNpuKernel : public SubGraphKernel { std::string GetOMModelName(); private: + std::vector inputs_nhwc2nchw_; + + std::vector outputs_nchw2nhwc_; + domi::ModelBufferData *model_buffer_data_; std::vector subgraph_input_op_; diff --git a/mindspore/lite/src/runtime/kernel/npu/eltwise_npu.cc b/mindspore/lite/src/runtime/kernel/npu/eltwise_npu.cc index 8316c730a7..c5c82793f3 100644 --- a/mindspore/lite/src/runtime/kernel/npu/eltwise_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/eltwise_npu.cc @@ -32,7 +32,6 @@ int EltwiseNPUKernel::IsSupport(const std::vector &inputs, const int EltwiseNPUKernel::SetNPUInputs(const std::vector &inputs, const std::vector &outputs, const std::vector &npu_inputs) { - MS_LOG(ERROR) << name_; op_ = new (std::nothrow) hiai::op::Eltwise(name_); if (op_ == nullptr) { MS_LOG(ERROR) << name_ << " op is nullptr"; diff --git a/mindspore/lite/src/runtime/kernel/npu/resize_npu.cc b/mindspore/lite/src/runtime/kernel/npu/resize_npu.cc index ebd3695f1b..17bcb35fb8 100644 --- a/mindspore/lite/src/runtime/kernel/npu/resize_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/resize_npu.cc @@ -31,16 +31,22 @@ int ResizeNPUKernel::IsSupport(const std::vector &inputs, const MS_LOG(WARNING) << "Unsupported resize method type:" << method_; return RET_ERROR; } - return RET_ERROR; + return RET_OK; } int ResizeNPUKernel::SetNPUInputs(const std::vector &inputs, const std::vector &outputs, const std::vector &npu_inputs) { + auto ret = SetPreTranspose(npu_inputs[0]); + if (ret != RET_OK) { + MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed."; + return RET_ERROR; + } + ge::TensorDesc sizeTensorDesc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_INT32); ge::TensorPtr sizeTensor = std::make_shared(sizeTensorDesc); vector dataValue = {static_cast(new_height_), static_cast(new_width_)}; sizeTensor->SetData(reinterpret_cast(dataValue.data()), 2 * sizeof(int32_t)); - auto out_size = new (std::nothrow) hiai::op::Const(name_ + "size"); + auto out_size = new (std::nothrow) hiai::op::Const(name_ + "_size"); out_size->set_attr_value(sizeTensor); if (method_ == schema::ResizeMethod_LINEAR) { auto op = new (std::nothrow) hiai::op::ResizeBilinearV2(name_); @@ -49,7 +55,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector &inputs, con return RET_ERROR; } op->set_attr_align_corners(align_corners_); - op->set_input_x(*npu_inputs[0]); + op->set_input_x(*pre_trans_); op->set_input_size(*out_size); op->set_attr_half_pixel_centers(preserve_aspect_ratio_); op_ = op; @@ -60,14 +66,21 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector &inputs, con return RET_ERROR; } op->set_attr_align_corners(align_corners_); - op->set_input_x(*npu_inputs[0]); + op->set_input_x(*pre_trans_); op->set_input_size(*out_size); op_ = op; } + + ret = SetPostTranspose(op_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed."; + return RET_ERROR; + } + return RET_OK; } -ge::Operator *mindspore::kernel::ResizeNPUKernel::GetNPUOp() { return this->op_; } +ge::Operator *mindspore::kernel::ResizeNPUKernel::GetNPUOp() { return this->post_trans_; } ResizeNPUKernel::~ResizeNPUKernel() { if (op_ != nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/npu/resize_npu.h b/mindspore/lite/src/runtime/kernel/npu/resize_npu.h index 61188dff3d..5077ac58e4 100644 --- a/mindspore/lite/src/runtime/kernel/npu/resize_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/resize_npu.h @@ -22,13 +22,14 @@ #include "nnacl/arithmetic_common.h" #include "src/runtime/kernel/npu/npu_kernel.h" #include "include/graph/op/all_ops.h" +#include "src/runtime/kernel/npu/transpose_base_npu.h" namespace mindspore::kernel { -class ResizeNPUKernel : public NPUKernel { +class ResizeNPUKernel : public TransposeBaseNPUKernel { public: ResizeNPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : NPUKernel(parameter, inputs, outputs, ctx, primitive) { + : TransposeBaseNPUKernel(parameter, inputs, outputs, ctx, primitive) { auto resize_parameter = reinterpret_cast(parameter); method_ = resize_parameter->method_; new_height_ = resize_parameter->new_height_;