Browse Source

!11217 [MSLITE][DEVELOP] fix bug of npu for some tflite basic models

From: @yangruoqi713
Reviewed-by: 
Signed-off-by:
tags/v1.2.0-rc1
mindspore-ci-bot Gitee 4 years ago
parent
commit
4952f7feeb
11 changed files with 388 additions and 170 deletions
  1. +106
    -60
      mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc
  2. +5
    -4
      mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.h
  3. +56
    -51
      mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc
  4. +8
    -6
      mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h
  5. +69
    -41
      mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc
  6. +2
    -4
      mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.h
  7. +30
    -2
      mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.cc
  8. +7
    -2
      mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.h
  9. +57
    -0
      mindspore/lite/src/runtime/kernel/npu/squeeze_npu.cc
  10. +46
    -0
      mindspore/lite/src/runtime/kernel/npu/squeeze_npu.h
  11. +2
    -0
      mindspore/lite/test/models_npu.cfg

+ 106
- 60
mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc View File

@@ -15,6 +15,7 @@
*/
#include "src/runtime/agent/npu/optimizer/npu_insert_transform_pass.h"
#include <set>
#include <string>
#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h"

namespace mindspore::lite {
@@ -42,76 +43,110 @@ int GetInsertState(kernel::LiteKernel *kernel) {
return InsertNone;
}

int NPUInsertTransformPass::InsertPreNode(const InnerContext *context, kernel::LiteKernel *kernel,
std::vector<kernel::LiteKernel *> *trans_kernels,
std::vector<Tensor *> *all_tensors) {
int NPUInsertTransformPass::InsertNode(kernel::LiteKernel *kernel, kernel::LiteKernel *post_kernel,
std::vector<kernel::LiteKernel *> *trans_kernels) {
// Kernel and post_kernel can't be nullptr at the same time.
std::string kernel_name;
Tensor *in_tensor = nullptr;

std::vector<kernel::LiteKernel *> out_kernels;
// If post_kernel equals nullptr, kernel is the output of whole graph.
if (post_kernel != nullptr) {
out_kernels.push_back(post_kernel);
kernel_name = post_kernel->name() + "_pre";
in_tensor = post_kernel->in_tensors()[0];
}
std::vector<kernel::LiteKernel *> in_kernels;
// If kernel equals nullptr, post_kernel is the input of whole graph.
if (kernel != nullptr) {
in_kernels.push_back(kernel);
kernel_name = kernel->name() + "_post";
in_tensor = kernel->out_tensors()[0];
}
std::vector<int> nhwc_shape = in_tensor->shape();
std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]};

auto nh2nc_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR);
if (nh2nc_tensor == nullptr) {
MS_LOG(ERROR) << "New nchw tensor failed when inserting nchw2nhwc kernel.";
return RET_ERROR;
}
std::vector<Tensor *> nh2nc_tensors = {nh2nc_tensor};
all_tensors_->push_back(nh2nc_tensors[0]);

auto nc2nh_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nhwc_shape, schema::Format_NCHW, Tensor::VAR);
if (nc2nh_tensor == nullptr) {
MS_LOG(ERROR) << "New nhwc tensor failed when inserting nhwc2nchw kernel.";
return RET_ERROR;
}
std::vector<Tensor *> nc2nh_tensors = {nc2nh_tensor};
all_tensors_->push_back(nc2nh_tensors[0]);

auto nh2nc_name = kernel_name + "_nh2nc_" + std::to_string(total++);
auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel({in_tensor}, nh2nc_tensors, context_, nh2nc_name);
trans_kernels->push_back(nh2nc_kernel);
insert_primitive_.push_back(nh2nc_kernel->GetPrimitive());

auto nc2nh_name = kernel_name + "_nc2nh_" + std::to_string(total++);
auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context_, nc2nh_name);
trans_kernels->push_back(nc2nh_kernel);
insert_primitive_.push_back(nc2nh_kernel->GetPrimitive());

NPUPassUtils::UpdateKernel(nh2nc_kernel, in_kernels, {nc2nh_kernel}, {in_tensor}, nh2nc_tensors);
NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, out_kernels, nh2nc_tensors, nc2nh_tensors);
if (kernel != nullptr) {
NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, post_kernel);
}
if (post_kernel != nullptr) {
NPUPassUtils::UpdateNC2NHTransNodePostKernel(kernel, nc2nh_kernel, post_kernel);
}
return RET_OK;
}

int NPUInsertTransformPass::InsertPreNodes(kernel::LiteKernel *kernel,
std::vector<kernel::LiteKernel *> *trans_kernels) {
if (kernel->in_kernels().size() != kernel->in_tensors().size()) {
MS_LOG(DEBUG) << "The input tensors of kernel may be the input of whole graph or const tensor.";
return RET_OK;
}
if (kernel->in_kernels().empty()) {
auto ret = InsertNode(nullptr, kernel, trans_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel before kernel " << kernel->name() << " failed.";
return RET_ERROR;
}
}
for (auto in_kernel : kernel->in_kernels()) {
if (NPUPassUtils::IsNchw2Nhwc(in_kernel)) {
continue;
}
auto nhwc_shape = in_kernel->out_tensors()[0]->shape();
std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]};

auto nh2nc_tensor =
new Tensor(in_kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR);
std::vector<Tensor *> nh2nc_tensors = {nh2nc_tensor};
all_tensors->push_back(nh2nc_tensors[0]);

auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nhwc_shape, schema::Format_NCHW, Tensor::VAR);
std::vector<Tensor *> nc2nh_tensors = {nc2nh_tensor};
all_tensors->push_back(nc2nh_tensors[0]);

auto nh2nc_name = in_kernel->name() + "_nh2nc_" + std::to_string(total++);
auto *nh2nc_kernel =
NPUPassUtils::CreateNhwc2NchwKernel(in_kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name);
trans_kernels->push_back(nh2nc_kernel);
insert_primitive_.push_back(nh2nc_kernel->GetPrimitive());

auto nc2nh_name = in_kernel->name() + "_nc2nh_" + std::to_string(total++);
auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name);
trans_kernels->push_back(nc2nh_kernel);
insert_primitive_.push_back(nc2nh_kernel->GetPrimitive());

NPUPassUtils::UpdateKernel(nh2nc_kernel, {in_kernel}, {nc2nh_kernel}, in_kernel->out_tensors(), nh2nc_tensors);
NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {kernel}, nh2nc_tensors, nc2nh_tensors);
NPUPassUtils::UpdateNH2NCTransNodePreKernel(in_kernel, nh2nc_kernel, kernel);
NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(in_kernel, nc2nh_kernel, kernel);
auto ret = InsertNode(in_kernel, kernel, trans_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel before kernel " << kernel->name() << " failed.";
return RET_ERROR;
}
}
return RET_OK;
}

int NPUInsertTransformPass::InsertPostNode(const InnerContext *context, kernel::LiteKernel *kernel,
std::vector<kernel::LiteKernel *> *trans_kernels,
std::vector<Tensor *> *all_tensors) {
int NPUInsertTransformPass::InsertPostNodes(kernel::LiteKernel *kernel,
std::vector<kernel::LiteKernel *> *trans_kernels) {
if (kernel->out_kernels().empty()) {
auto ret = InsertNode(kernel, nullptr, trans_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel after kernel " << kernel->name() << " failed.";
return RET_ERROR;
}
}
for (auto out_kernel : kernel->out_kernels()) {
if (NPUPassUtils::IsNhwc2Nchw(out_kernel)) {
continue;
}
auto nhwc_shape = kernel->out_tensors()[0]->shape();
std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]};

auto nh2nc_tensor = new Tensor(kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR);
std::vector<Tensor *> nh2nc_tensors = {nh2nc_tensor};
all_tensors->push_back(nh2nc_tensors[0]);

auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nhwc_shape, schema::Format_NCHW, Tensor::VAR);
std::vector<Tensor *> nc2nh_tensors = {nc2nh_tensor};
all_tensors->push_back(nc2nh_tensors[0]);

auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++);
auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name);
trans_kernels->push_back(nh2nc_kernel);
insert_primitive_.push_back(nh2nc_kernel->GetPrimitive());

auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++);
auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name);
trans_kernels->push_back(nc2nh_kernel);
insert_primitive_.push_back(nc2nh_kernel->GetPrimitive());

NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors);
NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors);
NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel);
NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel, nc2nh_kernel, out_kernel);
auto ret = InsertNode(kernel, out_kernel, trans_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel after kernel " << kernel->name() << " failed.";
return RET_ERROR;
}
}
return RET_OK;
}
@@ -123,15 +158,26 @@ int NPUInsertTransformPass::Run() {
continue;
}
auto insert_state = GetInsertState(kernel);
// If the every output kernel is nhwc2nchw, insert
// modify loop index add post_kernels.size() to the next kernel in the origin vector
if (insert_state == PreInsert) {
std::vector<kernel::LiteKernel *> pre_kernels;
InsertPreNode(context_, kernel, &pre_kernels, all_tensors_);
auto ret = InsertPreNodes(kernel, &pre_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel before kernel " << kernel->name() << " failed.";
return RET_ERROR;
}
all_kernels_->insert(all_kernels_->begin() + i, pre_kernels.begin(), pre_kernels.end());
i += pre_kernels.size();
}

if (insert_state == PostInsert) {
std::vector<kernel::LiteKernel *> post_kernels;
InsertPostNode(context_, kernel, &post_kernels, all_tensors_);
auto ret = InsertPostNodes(kernel, &post_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel after kernel " << kernel->name() << " failed.";
return RET_ERROR;
}
all_kernels_->insert(all_kernels_->begin() + i + 1, post_kernels.begin(), post_kernels.end());
i += post_kernels.size();
}


+ 5
- 4
mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.h View File

@@ -41,11 +41,12 @@ class NPUInsertTransformPass : public NPUBasePass {
int Run() override;

private:
int InsertPreNode(const InnerContext *context, kernel::LiteKernel *kernel,
std::vector<kernel::LiteKernel *> *trans_kernels, std::vector<Tensor *> *all_tensors);
int InsertPreNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels);

int InsertPostNode(const InnerContext *context, kernel::LiteKernel *kernel,
std::vector<kernel::LiteKernel *> *trans_kernels, std::vector<Tensor *> *all_tensors);
int InsertPostNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels);

int InsertNode(kernel::LiteKernel *kernel, kernel::LiteKernel *post_kernel,
std::vector<kernel::LiteKernel *> *trans_kernels);

private:
int total = 0;


+ 56
- 51
mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc View File

@@ -15,6 +15,7 @@
*/

#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h"
#include "src/runtime/agent/npu/npu_manager.h"
#include "src/ops/transpose.h"
#include "nnacl/transpose.h"
#include "src/ops/populate/populate_register.h"
@@ -120,76 +121,80 @@ void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vector<ke

void NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *kernel) {
std::vector<kernel::LiteKernel *> out_kernels;

for (auto out_kernel : pre_kernel->out_kernels()) {
if (out_kernel == kernel) {
out_kernels.push_back(trans_kernel);
} else {
out_kernels.push_back(out_kernel);
// For kernel before trans, update the out_kernels; the output tensor of kernel is the input tensor of trans.
std::vector<kernel::LiteKernel *> out_kernels = pre_kernel->out_kernels();
for (size_t i = 0; i < out_kernels.size(); i++) {
if (out_kernels[i] == kernel) {
out_kernels[i] = trans_kernel;
break;
}
}
pre_kernel->set_out_kernels(out_kernels);
}

void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *post_kernel) {
std::vector<kernel::LiteKernel *> cur_out_kernels;
for (auto out_kernel : kernel->out_kernels()) {
if (out_kernel == post_kernel) {
cur_out_kernels.push_back(trans_kernel);
} else {
cur_out_kernels.push_back(out_kernel);
void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel,
std::vector<kernel::LiteKernel *> kernels) {
// For kernel before trans, there may be multiple outputs.
auto cur_out_kernels = pre_kernel->out_kernels();
for (size_t i = 0; i < kernels.size(); i++) {
auto itr = find(cur_out_kernels.begin(), cur_out_kernels.end(), kernels[i]);
if (itr != cur_out_kernels.end()) {
cur_out_kernels.erase(itr);
}
}
auto kernel_out_tensor = kernel->out_tensors()[0];
// Change format the output of the current kernel nhwc->nchw
auto nhwc_shape = kernel_out_tensor->shape();
std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]};
kernel_out_tensor->set_format(schema::Format_NCHW);
kernel_out_tensor->set_shape(nchw_shape);
kernel->set_out_kernels(cur_out_kernels);
kernel->set_out_tensors({kernel_out_tensor});
cur_out_kernels.push_back(trans_kernel);
pre_kernel->set_out_kernels(cur_out_kernels);
// For kernel before trans, the output tensor is used for output tensor of trans, so replace the output tensor with
// the input tensor of trans.
pre_kernel->set_out_tensors(trans_kernel->in_tensors());
}

void NPUPassUtils::UpdateNH2NCTransNodePostKernel(kernel::LiteKernel *trans_kernel, kernel::LiteKernel *post_kernel) {
auto cur_in_tensors = post_kernel->in_tensors();
cur_in_tensors[0] = trans_kernel->out_tensors()[0];
post_kernel->set_in_tensors(cur_in_tensors);
post_kernel->set_in_kernels({trans_kernel});
}

void NPUPassUtils::UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *pre_kernel) {
std::vector<lite::Tensor *> cur_kernel_in_tensors = {trans_kernel->out_tensors()[0]};
for (int i = 1; i < kernel->in_tensors().size(); i++) {
cur_kernel_in_tensors.push_back(kernel->in_tensors()[i]);
}
std::vector<kernel::LiteKernel *> cur_in_kernels = {trans_kernel};
for (int i = 1; i < kernel->in_kernels().size(); i++) {
auto in_kernel = kernel->in_kernels()[i];
if (in_kernel != kernel) {
cur_in_kernels.push_back(in_kernel);
void NPUPassUtils::UpdateNC2NHPostKernelInTensors(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *post_kernel) {
// For post_kernel that doesn't require insert trans kernel, because the output tensor of kernel(input tensor of
// trans_kernel) is updated, replace the input tensor of post_kernel.
auto post_in_tensors = post_kernel->in_tensors();
for (size_t i = 0; i < post_in_tensors.size(); i++) {
if (post_in_tensors[i] == kernel->out_tensors()[0]) {
post_in_tensors[i] = trans_kernel->in_tensors()[0];
break;
}
}
kernel->set_in_kernels(cur_in_kernels);
kernel->set_in_tensors({cur_kernel_in_tensors});
post_kernel->set_in_tensors(post_in_tensors);
}

void NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *post_kernel) {
std::vector<Tensor *> post_in_tensors;
for (auto post_in_tensor : post_kernel->in_tensors()) {
if (post_in_tensor != kernel->out_tensors()[0]) {
post_in_tensors.push_back(post_in_tensor);
} else {
post_in_tensors.push_back(trans_kernel->out_tensors()[0]);
void NPUPassUtils::UpdateNC2NHTransNodePostKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *post_kernel) {
// For post_kernel after trans, kernel should be replaced with trans_kernel.
auto post_in_tensors = post_kernel->in_tensors();
if (kernel == nullptr) {
post_in_tensors[0] = trans_kernel->out_tensors()[0];
} else {
for (size_t i = 0; i < post_in_tensors.size(); i++) {
if (post_in_tensors[i] == kernel->out_tensors()[0]) {
post_in_tensors[i] = trans_kernel->out_tensors()[0];
break;
}
}
}
post_kernel->set_in_tensors(post_in_tensors);
std::vector<kernel::LiteKernel *> post_in_kernels;
for (auto in_kernel : post_kernel->in_kernels()) {
if (in_kernel == kernel) {
post_in_kernels.push_back(trans_kernel);
} else {
post_in_kernels.push_back(in_kernel);

// The input tensor should be replaced with the output tensor of trans_kernel.
std::vector<kernel::LiteKernel *> post_in_kernels = post_kernel->in_kernels();
for (size_t i = 0; i < post_in_kernels.size(); i++) {
if (post_in_kernels[i] == kernel) {
post_in_kernels[i] = trans_kernel;
break;
}
}
post_kernel->set_in_kernels(post_in_kernels);
post_kernel->set_in_tensors({post_in_tensors});
}

bool NPUPassUtils::IsNhwc2Nchw(const kernel::LiteKernel *kernel) {


+ 8
- 6
mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h View File

@@ -38,14 +38,16 @@ class NPUPassUtils {
static void UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *kernel);

static void UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *post_kernel);
static void UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel,
std::vector<kernel::LiteKernel *> kernels);

static void UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *pre_kernel);
static void UpdateNH2NCTransNodePostKernel(kernel::LiteKernel *trans_kernel, kernel::LiteKernel *post_kernel);

static void UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *post_kernel);
static void UpdateNC2NHTransNodePostKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *post_kernel);

static void UpdateNC2NHPostKernelInTensors(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *post_kernel);

static bool IsNhwc2Nchw(const kernel::LiteKernel *kernel);



+ 69
- 41
mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc View File

@@ -20,10 +20,9 @@
#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h"
namespace mindspore::lite {
using kernel::KERNEL_ARCH::kNPU;
int NPUTransformPass::InsertPreNode(const InnerContext *context, kernel::LiteKernel *kernel,
std::vector<kernel::LiteKernel *> *trans_kernels,
std::vector<Tensor *> *all_tensors) {
int NPUTransformPass::InsertPreNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels) {
bool is_input_kernel = kernel->in_kernels().empty();
// single input
if (is_input_kernel || kernel->in_kernels()[0]->desc().arch != kNPU ||
npu_trans_nodes.find(kernel->in_kernels()[0]->Type()) == npu_trans_nodes.end()) {
kernel::LiteKernel *pre_kernel = nullptr;
@@ -34,69 +33,86 @@ int NPUTransformPass::InsertPreNode(const InnerContext *context, kernel::LiteKer
// Create pre transform kernel's out tensor.
auto nhwc_shape = kernel->in_tensors()[0]->shape();
std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]};
auto tensor = new Tensor(kernel->in_tensors()[0]->data_type(), nchw_shape, schema::Format_NCHW, Tensor::VAR);
auto tensor =
new (std::nothrow) Tensor(kernel->in_tensors()[0]->data_type(), nchw_shape, schema::Format_NCHW, Tensor::VAR);
if (tensor == nullptr) {
MS_LOG(ERROR) << "New nchw tensor failed when inserting pre nhwc2nchw kernel.";
return RET_ERROR;
}
std::vector<Tensor *> pre_trans_out_tensors = {tensor};
all_tensors->push_back(pre_trans_out_tensors[0]);
all_tensors_->push_back(pre_trans_out_tensors[0]);

// Create pre transform kernel: Nhwc2Nchw
auto name = kernel->name() + "_pre_trans" + "_Nhwc2Nchw_" + std::to_string(total++);
auto *trans_kernel =
NPUPassUtils::CreateNhwc2NchwKernel({kernel->in_tensors()[0]}, pre_trans_out_tensors, context, name);
NPUPassUtils::CreateNhwc2NchwKernel({kernel->in_tensors()[0]}, pre_trans_out_tensors, context_, name);

trans_kernels->push_back(trans_kernel);
insert_primitive_.push_back(trans_kernel->GetPrimitive());

// Set in_kernels, out_kernels, in_tensors,out_tensors for transform kernel
std::vector<kernel::LiteKernel *> pre_trans_in_kernel;
if (is_input_kernel) {
pre_trans_in_kernel = {};
} else {
pre_trans_in_kernel = {pre_kernel};
// Set in_kernels, out_kernels, in_tensors, out_tensors for transform kernel
std::vector<kernel::LiteKernel *> pre_trans_in_kernels;
if (!is_input_kernel) {
pre_trans_in_kernels = {pre_kernel};
}
NPUPassUtils::UpdateKernel(trans_kernel, pre_trans_in_kernel, {kernel}, {kernel->in_tensors()[0]},
NPUPassUtils::UpdateKernel(trans_kernel, pre_trans_in_kernels, {kernel}, {kernel->in_tensors()[0]},
pre_trans_out_tensors);

if (pre_kernel != nullptr) {
NPUPassUtils::UpdateNH2NCTransNodePreKernel(pre_kernel, trans_kernel, kernel);
}
NPUPassUtils::UpdateNH2NCTransNodeAfterKernel(kernel, trans_kernel, pre_kernel);
NPUPassUtils::UpdateNH2NCTransNodePostKernel(trans_kernel, kernel);
}
return RET_OK;
}

int NPUTransformPass::InsertPostNode(const InnerContext *context, kernel::LiteKernel *kernel,
std::vector<kernel::LiteKernel *> *trans_kernels,
std::vector<Tensor *> *all_tensors) {
// Model output does not insert operator
if (kernel->out_kernels().empty()) {
return RET_OK;
}
// Single output multiple references
int NPUTransformPass::InsertPostNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels) {
bool is_output_kernel = kernel->out_kernels().empty();
// Get the post kernel that need insert trans kernel.
// If no need for inserting trans kernel, the post kernel must be npu and in trans_nodes.
std::vector<kernel::LiteKernel *> post_insert_kernels;
for (int i = 0; i < kernel->out_kernels().size(); i++) {
auto post_kernel = kernel->out_kernels().at(i);
if (post_kernel->desc().arch == kNPU && npu_trans_nodes.find(post_kernel->Type()) != npu_trans_nodes.end()) {
continue;
auto post_kernel = kernel->out_kernels()[i];
if (post_kernel->desc().arch != kNPU || npu_trans_nodes.find(post_kernel->Type()) == npu_trans_nodes.end()) {
post_insert_kernels.push_back(post_kernel);
}

// Create post transform kernel's out tensor.
auto tensor = new Tensor(kernel->out_tensors()[0]->data_type(), kernel->out_tensors()[0]->shape(),
schema::Format_NHWC, Tensor::VAR);
std::vector<Tensor *> post_trans_out_tensors = {tensor};
all_tensors->push_back(post_trans_out_tensors[0]);
}
if (is_output_kernel || !post_insert_kernels.empty()) {
// Create post transform kernel's in tensor.
auto nhwc_shape = kernel->out_tensors()[0]->shape();
std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]};
auto tensor =
new (std::nothrow) Tensor(kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR);
if (tensor == nullptr) {
MS_LOG(ERROR) << "New nchw tensor failed when inserting post nchw2nhwc kernel.";
return RET_ERROR;
}
std::vector<Tensor *> post_trans_in_tensors = {tensor};
all_tensors_->push_back(tensor);
auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++);
tensor->set_tensor_name(name + "/input0");

// Create post transform kernel: Nchw2Nhwc
auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++);
auto *post_trans_kernel =
NPUPassUtils::CreateNchw2NhwcKernel(kernel->out_tensors(), post_trans_out_tensors, context, name);
NPUPassUtils::CreateNchw2NhwcKernel(post_trans_in_tensors, kernel->out_tensors(), context_, name);

// Set in_kernels, out_kernels, in_tensors,out_tensors for transform kernel
NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {post_kernel}, kernel->out_tensors(),
post_trans_out_tensors);
// Set in_kernels, out_kernels, in_tensors, out_tensors for transform kernel
NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, post_insert_kernels, post_trans_in_tensors,
kernel->out_tensors());
insert_primitive_.push_back(post_trans_kernel->GetPrimitive());

trans_kernels->push_back(post_trans_kernel);
NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, post_kernel);
NPUPassUtils::UpdateNC2NHTransNodeAfterKernel(kernel, post_trans_kernel, post_kernel);

if (!is_output_kernel) {
for (int i = 0; i < kernel->out_kernels().size(); i++) {
auto post_kernel = kernel->out_kernels()[i];
if (find(post_insert_kernels.begin(), post_insert_kernels.end(), post_kernel) != post_insert_kernels.end()) {
NPUPassUtils::UpdateNC2NHTransNodePostKernel(kernel, post_trans_kernel, post_kernel);
} else {
NPUPassUtils::UpdateNC2NHPostKernelInTensors(kernel, post_trans_kernel, post_kernel);
}
}
}
NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, post_insert_kernels);
}
return RET_OK;
}
@@ -108,13 +124,25 @@ int NPUTransformPass::Run() {
i++;
continue;
}
// insert pre_kernels before kernel in vector
// modify loop index add (pre_kernels.size() + 1) to the post_kernels insert location
std::vector<kernel::LiteKernel *> pre_kernels;
InsertPreNode(context_, kernel, &pre_kernels, all_tensors_);
auto ret = InsertPreNodes(kernel, &pre_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw kernel before kernel " << kernel->name() << " failed.";
return RET_ERROR;
}
all_kernels_->insert(all_kernels_->begin() + i, pre_kernels.begin(), pre_kernels.end());
i += (pre_kernels.size() + 1);

// insert post_kernels after kernel in vector
// modify loop index add post_kernels.size() to the next kernel in the origin vector
std::vector<kernel::LiteKernel *> post_kernels;
InsertPostNode(context_, kernel, &post_kernels, all_tensors_);
ret = InsertPostNodes(kernel, &post_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nchw2nhwc kernel after kernel " << kernel->name() << " failed.";
return RET_ERROR;
}
all_kernels_->insert(all_kernels_->begin() + i, post_kernels.begin(), post_kernels.end());
i += post_kernels.size();
}


+ 2
- 4
mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.h View File

@@ -42,11 +42,9 @@ class NPUTransformPass : public NPUBasePass {
}

private:
int InsertPreNode(const InnerContext *context, kernel::LiteKernel *kernel,
std::vector<kernel::LiteKernel *> *trans_kernels, std::vector<Tensor *> *all_tensors);
int InsertPreNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels);

int InsertPostNode(const InnerContext *context, kernel::LiteKernel *kernel,
std::vector<kernel::LiteKernel *> *trans_kernels, std::vector<Tensor *> *all_tensors);
int InsertPostNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels);

private:
int total = 0;


+ 30
- 2
mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.cc View File

@@ -21,6 +21,9 @@

using mindspore::kernel::KERNEL_ARCH::kNPU;
using mindspore::lite::KernelRegistrar;
using mindspore::schema::ActivationType_NO_ACTIVATION;
using mindspore::schema::ActivationType_RELU;
using mindspore::schema::ActivationType_RELU6;
using mindspore::schema::PrimitiveType_Add;
using mindspore::schema::PrimitiveType_Div;
using mindspore::schema::PrimitiveType_Equal;
@@ -118,7 +121,6 @@ int ArithmeticNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
case PrimitiveType_GreaterEqual:
op = CreateOperator<hiai::op::GreaterEqual>(npu_inputs, name_);
break;

default:
MS_LOG(ERROR) << "Unsupported primitive type:"
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(primitive_->Type()));
@@ -129,16 +131,42 @@ int ArithmeticNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
return RET_ERROR;
}
op_ = op;

if (activation_type_ != ActivationType_NO_ACTIVATION) {
act_ = new (std::nothrow) hiai::op::Activation(name_ + "_act");
if (act_ == nullptr) {
MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed.";
return RET_ERROR;
}
act_->set_input_x(*op_);
if (activation_type_ == ActivationType_RELU) {
act_->set_attr_mode(1);
} else if (activation_type_ == ActivationType_RELU6) {
act_->set_attr_mode(14);
} else {
MS_LOG(ERROR) << "Unsupport activation type for op " << name_;
return RET_ERROR;
}
}
return RET_OK;
}

ge::Operator *mindspore::kernel::ArithmeticNPUKernel::GetNPUOp() { return this->op_; }
ge::Operator *mindspore::kernel::ArithmeticNPUKernel::GetNPUOp() {
if (activation_type_ == ActivationType_NO_ACTIVATION) {
return op_;
}
return act_;
}

ArithmeticNPUKernel::~ArithmeticNPUKernel() {
if (op_ != nullptr) {
delete op_;
op_ = nullptr;
}
if (act_ != nullptr) {
delete act_;
act_ = nullptr;
}
}

REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Mul, NPUKernelCreator<ArithmeticNPUKernel>)


+ 7
- 2
mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.h View File

@@ -17,15 +17,18 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_
#include <vector>
#include "nnacl/arithmetic.h"
#include "src/runtime/kernel/npu/npu_kernel.h"
#include "include/graph/op/math_defs.h"
#include "include/graph/op/all_ops.h"
namespace mindspore::kernel {
class ArithmeticNPUKernel : public NPUKernel {
public:
ArithmeticNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {}
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
activation_type_ = reinterpret_cast<ArithmeticParameter *>(parameter)->activation_type_;
}
~ArithmeticNPUKernel() override;

int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
@@ -36,7 +39,9 @@ class ArithmeticNPUKernel : public NPUKernel {
ge::Operator *GetNPUOp() override;

private:
int activation_type_;
ge::Operator *op_ = nullptr;
hiai::op::Activation *act_ = nullptr;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ARITHMETIC_NPU_H_

+ 57
- 0
mindspore/lite/src/runtime/kernel/npu/squeeze_npu.cc View File

@@ -0,0 +1,57 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "src/runtime/kernel/npu/squeeze_npu.h"
#include "src/kernel_registry.h"
#include "src/runtime/agent/npu/npu_converter_utils.h"
using mindspore::kernel::KERNEL_ARCH::kNPU;
using mindspore::lite::KernelRegistrar;
using mindspore::schema::PrimitiveType_Squeeze;

namespace mindspore::kernel {
int SqueezeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
OpParameter *opParameter) {
return RET_OK;
}

int SqueezeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs,
const std::vector<ge::Operator *> &npu_inputs) {
op_ = new (std::nothrow) hiai::op::Squeeze(name_);
if (op_ == nullptr) {
MS_LOG(ERROR) << "New squeeze npu operator for op " << name_ << " failed.";
return RET_ERROR;
}
std::vector<int64_t> axes;
for (int i = 0; i < axes_.size(); i++) {
axes.push_back(axes_[i]);
}
op_->set_input_x(*npu_inputs[0]);
op_->set_attr_axis(axes);
return RET_OK;
}

ge::Operator *mindspore::kernel::SqueezeNPUKernel::GetNPUOp() { return this->op_; }

SqueezeNPUKernel::~SqueezeNPUKernel() {
if (op_ != nullptr) {
delete op_;
op_ = nullptr;
}
}

REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Squeeze, NPUKernelCreator<SqueezeNPUKernel>)
} // namespace mindspore::kernel

+ 46
- 0
mindspore/lite/src/runtime/kernel/npu/squeeze_npu.h View File

@@ -0,0 +1,46 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SQUEEZE_NPU_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SQUEEZE_NPU_H_
#include <vector>
#include "src/ops/squeeze.h"
#include "src/runtime/kernel/npu/npu_kernel.h"
#include "include/graph/op/all_ops.h"
namespace mindspore::kernel {
class SqueezeNPUKernel : public NPUKernel {
public:
SqueezeNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
auto squeeze = reinterpret_cast<const mindspore::lite::Squeeze *>(primitive);
axes_ = squeeze->GetAxis();
}
~SqueezeNPUKernel() override;

int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
OpParameter *opParameter) override;
int SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
const std::vector<ge::Operator *> &npu_inputs) override;
ge::Operator *GetNPUOp() override;

private:
hiai::op::Squeeze *op_ = nullptr;
vector<int> axes_;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SQUEEZE_NPU_H_

+ 2
- 0
mindspore/lite/test/models_npu.cfg View File

@@ -16,6 +16,7 @@ mobilenet_v1_1.0_192.tflite 6
mobilenet_v1_1.0_224.tflite 2.5
mobilenet_v2_1.0_224.tflite 2.5
squeezenet.tflite 2.5
inception_resnet_v2.tflite 2
inception_v3.tflite 1
inception_v4.tflite 0.5
efficientnet_lite0_fp32_2.tflite 1
@@ -23,6 +24,7 @@ efficientnet_lite1_fp32_2.tflite 1
efficientnet_lite2_fp32_2.tflite 1
efficientnet_lite3_fp32_2.tflite 1
efficientnet_lite4_fp32_2.tflite 1
deeplabv3_1_default_1.tflite 2.5
6c_seg_nomean_20200610 1.5
ml_video_edit_person_divison 0.5
porseg_tmp.onnx 1 2

Loading…
Cancel
Save