| @@ -17,7 +17,7 @@ | |||||
| #include "src/runtime/agent/npu/npu_executor.h" | #include "src/runtime/agent/npu/npu_executor.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/runtime/agent/npu/npu_manager.h" | #include "src/runtime/agent/npu/npu_manager.h" | ||||
| #include "nnacl/pack.h" | |||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { | int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { | ||||
| this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(); | this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(); | ||||
| @@ -32,12 +32,23 @@ int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int NPUExecutor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_tensors, | |||||
| std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator, const KernelCallBack &before, | |||||
| int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||||
| const std::vector<kernel::LiteKernel *> &kernels, const std::vector<bool> &inputs_nhwc2nchw, | |||||
| const std::vector<bool> &outputs_nchw2nhwc, Allocator *allocator, const KernelCallBack &before, | |||||
| const KernelCallBack &after) { | const KernelCallBack &after) { | ||||
| hiai::AiContext context; | hiai::AiContext context; | ||||
| for (int i = 0; i < npu_input_tensors_.size(); ++i) { | for (int i = 0; i < npu_input_tensors_.size(); ++i) { | ||||
| memcpy(npu_input_tensors_[i]->GetBuffer(), in_tensors[i]->data_c(), in_tensors[i]->Size()); | |||||
| void *data = in_tensors[i]->data_c(); | |||||
| if (data == nullptr) { | |||||
| MS_LOG(ERROR) << model_name_ << " inputs data is nullptr"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (inputs_nhwc2nchw[i]) { | |||||
| PackNHWCToNCHWFp32(data, npu_input_tensors_[i]->GetBuffer(), in_tensors[i]->Batch(), | |||||
| in_tensors[i]->Width() * in_tensors[i]->Height(), in_tensors[i]->Channel()); | |||||
| } else { | |||||
| memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[i]->Size()); | |||||
| } | |||||
| } | } | ||||
| context.AddPara("model_name", model_name_); | context.AddPara("model_name", model_name_); | ||||
| if (this->client_ == nullptr) { | if (this->client_ == nullptr) { | ||||
| @@ -52,10 +63,19 @@ int NPUExecutor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &o | |||||
| } | } | ||||
| for (int i = 0; i < npu_output_tensors_.size(); ++i) { | for (int i = 0; i < npu_output_tensors_.size(); ++i) { | ||||
| memcpy(out_tensors[i]->MutableData(), npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); | |||||
| void *data = out_tensors[i]->MutableData(); | |||||
| if (data == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (outputs_nchw2nhwc[i]) { | |||||
| PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data, out_tensors[i]->Batch(), | |||||
| out_tensors[i]->Width() * out_tensors[i]->Height(), out_tensors[i]->Channel()); | |||||
| } else { | |||||
| memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); | |||||
| } | |||||
| out_tensors[i]->ResetRefCount(); | out_tensors[i]->ResetRefCount(); | ||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -31,9 +31,10 @@ class NPUExecutor : public Executor { | |||||
| ~NPUExecutor() override = default; | ~NPUExecutor() override = default; | ||||
| int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override; | int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override; | ||||
| int Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_tensors, | |||||
| std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator = nullptr, | |||||
| const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr) override; | |||||
| int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||||
| const std::vector<kernel::LiteKernel *> &kernels, const std::vector<bool> &inputs_nhwc2nchw, | |||||
| const std::vector<bool> &outputs_nchw2nhwc, Allocator *allocator = nullptr, | |||||
| const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr); | |||||
| private: | private: | ||||
| int GetIOTensorVec(); | int GetIOTensorVec(); | ||||
| @@ -34,6 +34,10 @@ namespace mindspore::kernel { | |||||
| using mindspore::lite::RET_ERROR; | using mindspore::lite::RET_ERROR; | ||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| std::set<schema::PrimitiveType> trans_nodes = {schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, | |||||
| schema::PrimitiveType_DepthwiseConv2D, | |||||
| schema::PrimitiveType_DeDepthwiseConv2D, schema::PrimitiveType_Resize}; | |||||
| domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { | domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { | ||||
| ge::Graph graph("NPUGraph"); | ge::Graph graph("NPUGraph"); | ||||
| @@ -70,12 +74,12 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { | |||||
| return om_model_buff; | return om_model_buff; | ||||
| } | } | ||||
| int SubGraphNpuKernel::Run() { return this->executor_->Run(in_tensors_, out_tensors_, nodes_, nullptr); } | |||||
| int SubGraphNpuKernel::Run() { | |||||
| return reinterpret_cast<lite::NPUExecutor *>(this->executor_) | |||||
| ->Run(in_tensors_, out_tensors_, nodes_, inputs_nhwc2nchw_, outputs_nchw2nhwc_); | |||||
| } | |||||
| int SubGraphNpuKernel::BuildNPUInputOp() { | int SubGraphNpuKernel::BuildNPUInputOp() { | ||||
| std::set<schema::PrimitiveType> trans_nodes = {schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, | |||||
| schema::PrimitiveType_DepthwiseConv2D, | |||||
| schema::PrimitiveType_DeDepthwiseConv2D}; | |||||
| int count = 0; | int count = 0; | ||||
| subgraph_input_op_.clear(); | subgraph_input_op_.clear(); | ||||
| for (auto node : this->nodes_) { | for (auto node : this->nodes_) { | ||||
| @@ -94,8 +98,10 @@ int SubGraphNpuKernel::BuildNPUInputOp() { | |||||
| ge::TensorDesc tensor_desc(lite::ConverterToNPUShape({shape[0], shape[3], shape[1], shape[2]}), | ge::TensorDesc tensor_desc(lite::ConverterToNPUShape({shape[0], shape[3], shape[1], shape[2]}), | ||||
| ge::FORMAT_NCHW, lite::ConverterToNPUDataType(in_tensor->data_type())); | ge::FORMAT_NCHW, lite::ConverterToNPUDataType(in_tensor->data_type())); | ||||
| data->update_input_desc_x(tensor_desc); | data->update_input_desc_x(tensor_desc); | ||||
| inputs_nhwc2nchw_.push_back(true); | |||||
| } else { | } else { | ||||
| data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name); | data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name); | ||||
| inputs_nhwc2nchw_.push_back(false); | |||||
| } | } | ||||
| subgraph_input_op_.push_back(*data); | subgraph_input_op_.push_back(*data); | ||||
| node_input_op.push_back(data); | node_input_op.push_back(data); | ||||
| @@ -156,6 +162,11 @@ std::vector<ge::Operator> SubGraphNpuKernel::GetNPUNodes(const vector<kernel::Li | |||||
| ops.reserve(nodes.size()); | ops.reserve(nodes.size()); | ||||
| for (int i = 0; i < nodes.size(); i++) { | for (int i = 0; i < nodes.size(); i++) { | ||||
| ops.push_back(*reinterpret_cast<NPUKernel *>(nodes[i])->GetNPUOp()); | ops.push_back(*reinterpret_cast<NPUKernel *>(nodes[i])->GetNPUOp()); | ||||
| if (trans_nodes.find(schema::PrimitiveType(nodes[i]->GetPrimitive()->Type())) != trans_nodes.end()) { | |||||
| outputs_nchw2nhwc_.push_back(true); | |||||
| } else { | |||||
| outputs_nchw2nhwc_.push_back(false); | |||||
| } | |||||
| } | } | ||||
| return ops; | return ops; | ||||
| } | } | ||||
| @@ -69,6 +69,10 @@ class SubGraphNpuKernel : public SubGraphKernel { | |||||
| std::string GetOMModelName(); | std::string GetOMModelName(); | ||||
| private: | private: | ||||
| std::vector<bool> inputs_nhwc2nchw_; | |||||
| std::vector<bool> outputs_nchw2nhwc_; | |||||
| domi::ModelBufferData *model_buffer_data_; | domi::ModelBufferData *model_buffer_data_; | ||||
| std::vector<ge::Operator> subgraph_input_op_; | std::vector<ge::Operator> subgraph_input_op_; | ||||
| @@ -32,7 +32,6 @@ int EltwiseNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const | |||||
| int EltwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | int EltwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, | const std::vector<lite::Tensor *> &outputs, | ||||
| const std::vector<ge::Operator *> &npu_inputs) { | const std::vector<ge::Operator *> &npu_inputs) { | ||||
| MS_LOG(ERROR) << name_; | |||||
| op_ = new (std::nothrow) hiai::op::Eltwise(name_); | op_ = new (std::nothrow) hiai::op::Eltwise(name_); | ||||
| if (op_ == nullptr) { | if (op_ == nullptr) { | ||||
| MS_LOG(ERROR) << name_ << " op is nullptr"; | MS_LOG(ERROR) << name_ << " op is nullptr"; | ||||
| @@ -31,16 +31,22 @@ int ResizeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const | |||||
| MS_LOG(WARNING) << "Unsupported resize method type:" << method_; | MS_LOG(WARNING) << "Unsupported resize method type:" << method_; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| return RET_ERROR; | |||||
| return RET_OK; | |||||
| } | } | ||||
| int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | ||||
| const std::vector<ge::Operator *> &npu_inputs) { | const std::vector<ge::Operator *> &npu_inputs) { | ||||
| auto ret = SetPreTranspose(npu_inputs[0]); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| ge::TensorDesc sizeTensorDesc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_INT32); | ge::TensorDesc sizeTensorDesc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_INT32); | ||||
| ge::TensorPtr sizeTensor = std::make_shared<hiai::Tensor>(sizeTensorDesc); | ge::TensorPtr sizeTensor = std::make_shared<hiai::Tensor>(sizeTensorDesc); | ||||
| vector<int32_t> dataValue = {static_cast<int32_t>(new_height_), static_cast<int32_t>(new_width_)}; | vector<int32_t> dataValue = {static_cast<int32_t>(new_height_), static_cast<int32_t>(new_width_)}; | ||||
| sizeTensor->SetData(reinterpret_cast<uint8_t *>(dataValue.data()), 2 * sizeof(int32_t)); | sizeTensor->SetData(reinterpret_cast<uint8_t *>(dataValue.data()), 2 * sizeof(int32_t)); | ||||
| auto out_size = new (std::nothrow) hiai::op::Const(name_ + "size"); | |||||
| auto out_size = new (std::nothrow) hiai::op::Const(name_ + "_size"); | |||||
| out_size->set_attr_value(sizeTensor); | out_size->set_attr_value(sizeTensor); | ||||
| if (method_ == schema::ResizeMethod_LINEAR) { | if (method_ == schema::ResizeMethod_LINEAR) { | ||||
| auto op = new (std::nothrow) hiai::op::ResizeBilinearV2(name_); | auto op = new (std::nothrow) hiai::op::ResizeBilinearV2(name_); | ||||
| @@ -49,7 +55,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| op->set_attr_align_corners(align_corners_); | op->set_attr_align_corners(align_corners_); | ||||
| op->set_input_x(*npu_inputs[0]); | |||||
| op->set_input_x(*pre_trans_); | |||||
| op->set_input_size(*out_size); | op->set_input_size(*out_size); | ||||
| op->set_attr_half_pixel_centers(preserve_aspect_ratio_); | op->set_attr_half_pixel_centers(preserve_aspect_ratio_); | ||||
| op_ = op; | op_ = op; | ||||
| @@ -60,14 +66,21 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| op->set_attr_align_corners(align_corners_); | op->set_attr_align_corners(align_corners_); | ||||
| op->set_input_x(*npu_inputs[0]); | |||||
| op->set_input_x(*pre_trans_); | |||||
| op->set_input_size(*out_size); | op->set_input_size(*out_size); | ||||
| op_ = op; | op_ = op; | ||||
| } | } | ||||
| ret = SetPostTranspose(op_); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| ge::Operator *mindspore::kernel::ResizeNPUKernel::GetNPUOp() { return this->op_; } | |||||
| ge::Operator *mindspore::kernel::ResizeNPUKernel::GetNPUOp() { return this->post_trans_; } | |||||
| ResizeNPUKernel::~ResizeNPUKernel() { | ResizeNPUKernel::~ResizeNPUKernel() { | ||||
| if (op_ != nullptr) { | if (op_ != nullptr) { | ||||
| @@ -22,13 +22,14 @@ | |||||
| #include "nnacl/arithmetic_common.h" | #include "nnacl/arithmetic_common.h" | ||||
| #include "src/runtime/kernel/npu/npu_kernel.h" | #include "src/runtime/kernel/npu/npu_kernel.h" | ||||
| #include "include/graph/op/all_ops.h" | #include "include/graph/op/all_ops.h" | ||||
| #include "src/runtime/kernel/npu/transpose_base_npu.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class ResizeNPUKernel : public NPUKernel { | |||||
| class ResizeNPUKernel : public TransposeBaseNPUKernel { | |||||
| public: | public: | ||||
| ResizeNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ResizeNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||||
| : TransposeBaseNPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||||
| auto resize_parameter = reinterpret_cast<ResizeParameter *>(parameter); | auto resize_parameter = reinterpret_cast<ResizeParameter *>(parameter); | ||||
| method_ = resize_parameter->method_; | method_ = resize_parameter->method_; | ||||
| new_height_ = resize_parameter->new_height_; | new_height_ = resize_parameter->new_height_; | ||||