|
|
|
@@ -15,6 +15,7 @@ |
|
|
|
*/ |
|
|
|
|
|
|
|
#include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h" |
|
|
|
#include <set> |
|
|
|
#include "src/runtime/opencl/opencl_executor.h" |
|
|
|
#include "src/runtime/opencl/opencl_runtime.h" |
|
|
|
#include "src/runtime/kernel/opencl/utils.h" |
|
|
|
@@ -26,23 +27,41 @@ namespace mindspore::kernel { |
|
|
|
SubGraphOpenCLKernel::~SubGraphOpenCLKernel() { UnInit(); } |
|
|
|
|
|
|
|
int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::tensor::Tensor *> &in_tensors, |
|
|
|
const std::vector<kernel::LiteKernel *> in_kernels, |
|
|
|
const std::vector<std::vector<kernel::LiteKernel *>> in_kernels, |
|
|
|
std::vector<lite::tensor::Tensor *> *out_tensors, |
|
|
|
std::vector<OpenCLToFormatParameter *> *out_parameters, |
|
|
|
std::vector<LiteKernel *> *out_convert_ops, OpenCLMemType mem_type) { |
|
|
|
out_tensors->clear(); |
|
|
|
out_parameters->clear(); |
|
|
|
out_convert_ops->clear(); |
|
|
|
MS_ASSERT(in_tensors.size() == to_kernels.size()); |
|
|
|
MS_ASSERT(in_tensors.size() == from_kernels.size()); |
|
|
|
for (auto &iv : in_kernels) { |
|
|
|
for (auto &jv : iv) { |
|
|
|
OpenCLKernel *cur_opencl_op = reinterpret_cast<OpenCLKernel *>(jv); |
|
|
|
schema::Format ori_format = cur_opencl_op->GetOriFormat(); |
|
|
|
auto tens = cur_opencl_op->out_tensors(); |
|
|
|
if (mem_type == OpenCLMemType::BUF && mem_type == cur_opencl_op->GetMemType() && |
|
|
|
tens[0]->GetFormat() == ori_format) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
if (mem_type == OpenCLMemType::IMG) { |
|
|
|
jv->set_in_tensors({}); |
|
|
|
} else { |
|
|
|
jv->set_out_tensors({}); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
for (size_t i = 0; i < in_tensors.size(); ++i) { |
|
|
|
OpenCLKernel *cur_opencl_op = reinterpret_cast<OpenCLKernel *>(in_kernels[i]); |
|
|
|
OpenCLKernel *cur_opencl_op = reinterpret_cast<OpenCLKernel *>(in_kernels[i][0]); |
|
|
|
schema::Format ori_format = cur_opencl_op->GetOriFormat(); |
|
|
|
if (mem_type == OpenCLMemType::BUF && mem_type == cur_opencl_op->GetMemType() && |
|
|
|
in_tensors[i]->GetFormat() == ori_format) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
auto dst_format = (mem_type == OpenCLMemType::IMG) ? in_kernels[i]->out_tensors()[0]->GetFormat() : ori_format; |
|
|
|
auto dst_format = (mem_type == OpenCLMemType::IMG) ? in_kernels[i][0]->out_tensors()[0]->GetFormat() : ori_format; |
|
|
|
auto src_format = |
|
|
|
(mem_type == OpenCLMemType::IMG) ? in_tensors[i]->GetFormat() : in_kernels[i]->out_tensors()[0]->GetFormat(); |
|
|
|
(mem_type == OpenCLMemType::IMG) ? in_tensors[i]->GetFormat() : in_kernels[i][0]->out_tensors()[0]->GetFormat(); |
|
|
|
lite::tensor::Tensor *new_tensor = new (std::nothrow) lite::tensor::Tensor(); |
|
|
|
MS_ASSERT(new_tensor); |
|
|
|
if (new_tensor == nullptr) { |
|
|
|
@@ -62,7 +81,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::tensor::Tensor * |
|
|
|
std::vector<int> dst_shape{shape[0], shape[2], shape[3], shape[1]}; |
|
|
|
new_tensor->set_shape(shape); |
|
|
|
} |
|
|
|
new_tensor->SetFormat(dst_format); |
|
|
|
new_tensor->SetFormat(in_kernels[i][0]->out_tensors()[0]->GetFormat()); |
|
|
|
out_tensors->emplace_back(new_tensor); |
|
|
|
#ifdef ENABLE_FP16 |
|
|
|
KernelKey desc{kGPU, kNumberTypeFloat16, schema::PrimitiveType_ToFormat}; |
|
|
|
@@ -94,13 +113,17 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::tensor::Tensor * |
|
|
|
} |
|
|
|
auto in_opencl_op = reinterpret_cast<OpenCLKernel *>(in_convert_op); |
|
|
|
if (mem_type == OpenCLMemType::IMG) { |
|
|
|
in_opencl_op->AddOutKernel(in_kernels[i]); |
|
|
|
reinterpret_cast<OpenCLKernel *>(in_kernels[i])->SetInKernel({in_convert_op}); |
|
|
|
reinterpret_cast<OpenCLKernel *>(in_kernels[i])->set_in_tensors({new_tensor}); |
|
|
|
for (auto &iv : in_kernels[i]) { |
|
|
|
in_opencl_op->AddOutKernel(iv); |
|
|
|
reinterpret_cast<OpenCLKernel *>(iv)->SetInKernel({in_convert_op}); |
|
|
|
reinterpret_cast<OpenCLKernel *>(iv)->set_in_tensors({new_tensor}); |
|
|
|
} |
|
|
|
} else { |
|
|
|
reinterpret_cast<OpenCLKernel *>(in_kernels[i])->SetOutKernel({in_convert_op}); |
|
|
|
reinterpret_cast<OpenCLKernel *>(in_kernels[i])->set_out_tensors({new_tensor}); |
|
|
|
in_convert_op->AddInKernel(in_kernels[i]); |
|
|
|
for (auto &iv : in_kernels[i]) { |
|
|
|
reinterpret_cast<OpenCLKernel *>(iv)->SetOutKernel({in_convert_op}); |
|
|
|
reinterpret_cast<OpenCLKernel *>(iv)->set_out_tensors({new_tensor}); |
|
|
|
in_convert_op->AddInKernel(iv); |
|
|
|
} |
|
|
|
} |
|
|
|
out_convert_ops->emplace_back(in_convert_op); |
|
|
|
} |
|
|
|
@@ -116,13 +139,19 @@ int SubGraphOpenCLKernel::Init() { |
|
|
|
for (const auto tensor : out_tensors_) { |
|
|
|
tensor->set_allocator(allocator_); |
|
|
|
} |
|
|
|
int ret = GenToFormatOp(in_tensors_, in_kernels_, &in_convert_tensors_, &in_parameters_, &in_convert_ops_, |
|
|
|
|
|
|
|
std::vector<std::vector<kernel::LiteKernel *>> from_kernels_; |
|
|
|
GetKernelFromToTensor(in_tensors_, in_kernels_, &from_kernels_, true); |
|
|
|
int ret = GenToFormatOp(in_tensors_, from_kernels_, &in_convert_tensors_, &in_parameters_, &in_convert_ops_, |
|
|
|
OpenCLMemType::IMG); |
|
|
|
if (ret != RET_OK) { |
|
|
|
return RET_ERROR; |
|
|
|
} |
|
|
|
nodes_.insert(nodes_.begin(), in_convert_ops_.begin(), in_convert_ops_.end()); |
|
|
|
ret = GenToFormatOp(out_tensors_, out_kernels_, &out_convert_tensors_, &out_parameters_, &out_convert_ops_, |
|
|
|
|
|
|
|
std::vector<std::vector<kernel::LiteKernel *>> to_kernels_; |
|
|
|
GetKernelFromToTensor(out_tensors_, out_kernels_, &to_kernels_, false); |
|
|
|
ret = GenToFormatOp(out_tensors_, to_kernels_, &out_convert_tensors_, &out_parameters_, &out_convert_ops_, |
|
|
|
OpenCLMemType::BUF); |
|
|
|
if (ret != RET_OK) { |
|
|
|
return RET_ERROR; |
|
|
|
@@ -144,6 +173,7 @@ int SubGraphOpenCLKernel::Init() { |
|
|
|
} |
|
|
|
return RET_OK; |
|
|
|
} |
|
|
|
|
|
|
|
int SubGraphOpenCLKernel::MallocTensorWithReuse() { |
|
|
|
kernel::LiteKernelUtil::InitTensorRefCount(nodes_); |
|
|
|
for (auto *kernel : nodes_) { |
|
|
|
@@ -195,10 +225,30 @@ int SubGraphOpenCLKernel::MallocTensorWithReuse() { |
|
|
|
return RET_OK; |
|
|
|
} |
|
|
|
|
|
|
|
int SubGraphOpenCLKernel::UnInit() { |
|
|
|
for (auto &tensor : out_tensors_) { |
|
|
|
allocator_->UnmapBuffer(tensor->Data()); |
|
|
|
int SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector<lite::tensor::Tensor *> &in_tensors, |
|
|
|
const std::vector<kernel::LiteKernel *> &in_kernels, |
|
|
|
std::vector<std::vector<kernel::LiteKernel *>> *out_kernels, |
|
|
|
bool is_from) { |
|
|
|
std::vector<std::set<lite::tensor::Tensor *>> ksets; |
|
|
|
for (auto jv : in_kernels) { |
|
|
|
auto tens = is_from ? jv->in_tensors() : jv->out_tensors(); |
|
|
|
std::set<lite::tensor::Tensor *> kset; |
|
|
|
kset.insert(tens.begin(), tens.end()); |
|
|
|
ksets.emplace_back(kset); |
|
|
|
} |
|
|
|
for (size_t i = 0; i < in_tensors.size(); ++i) { |
|
|
|
std::vector<kernel::LiteKernel *> kvec; |
|
|
|
for (size_t j = 0; j < in_kernels.size(); ++j) { |
|
|
|
if (ksets[j].count(in_tensors[i])) { |
|
|
|
kvec.emplace_back(in_kernels[j]); |
|
|
|
} |
|
|
|
} |
|
|
|
out_kernels->emplace_back(kvec); |
|
|
|
} |
|
|
|
return RET_OK; |
|
|
|
} |
|
|
|
|
|
|
|
int SubGraphOpenCLKernel::UnInit() { |
|
|
|
for (const auto tensor : in_tensors_) { |
|
|
|
if (tensor != nullptr) { |
|
|
|
tensor->FreeData(); |
|
|
|
@@ -206,12 +256,10 @@ int SubGraphOpenCLKernel::UnInit() { |
|
|
|
} |
|
|
|
for (const auto tensor : out_tensors_) { |
|
|
|
if (tensor != nullptr) { |
|
|
|
allocator_->UnmapBuffer(tensor->Data()); |
|
|
|
tensor->FreeData(); |
|
|
|
} |
|
|
|
} |
|
|
|
for (auto &tensor : out_tensors_) { |
|
|
|
allocator_->UnmapBuffer(tensor->Data()); |
|
|
|
} |
|
|
|
for (const auto tensor : in_convert_tensors_) { |
|
|
|
if (tensor != nullptr) { |
|
|
|
tensor->FreeData(); |
|
|
|
|