Browse Source

opencl_strided_slice

tags/v1.1.0
wangdongxu 5 years ago
parent
commit
c94563c06b
10 changed files with 852 additions and 609 deletions
  1. +0
    -146
      mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl
  2. +59
    -0
      mindspore/lite/src/runtime/kernel/opencl/cl/strided_slice.cl
  3. +0
    -106
      mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc
  4. +192
    -0
      mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc
  5. +15
    -4
      mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h
  6. +42
    -8
      mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
  7. +131
    -337
      mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc
  8. +317
    -0
      mindspore/lite/test/ut/src/runtime/kernel/opencl/strided_slice_tests.cc
  9. +84
    -2
      mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.cc
  10. +12
    -6
      mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h

+ 0
- 146
mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl View File

@@ -1,146 +0,0 @@
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#define INT2 int2
#define INT4 int4
__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
__kernel void slice_NHWC4(__read_only image2d_t input, __write_only image2d_t output, INT4 input_shape, INT4 out_shape,
INT4 begin, INT2 sharedNoUpdiv) {
int X = get_global_id(1); // H
int Y = get_global_id(2); // W
if (X >= out_shape.y || Y >= out_shape.z) {
return;
}
FLT4 result;
if (sharedNoUpdiv.x % 4 == 0) {
for (int i = 0; i < out_shape.w; i++) {
result = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + (i + begin.w), (X + begin.y)));
WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i, (X)), result);
}
} else {
int begin_postion = sharedNoUpdiv.x % 4;
FLT4 first = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + begin.w, (X + begin.y)));
if (begin_postion == 1) {
for (int i = 1; i <= out_shape.w; i++) {
FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + (begin.w + i), (X + begin.y)));
result.x = first.y;
result.y = first.z;
result.z = first.w;
result.w = second.x;
WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i - 1, (X)), result);
first.y = second.y;
first.z = second.z;
first.w = second.w;
}
} else if (begin_postion == 2) {
for (int i = 1; i <= out_shape.w; i++) {
FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + (begin.w + i), (X + begin.y)));
result.x = first.z;
result.y = first.w;
result.z = second.x;
result.w = second.y;
WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i - 1, (X)), result);
first.z = second.z;
first.w = second.w;
}
} else {
for (int i = 1; i <= out_shape.w; i++) {
FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + (begin.w + i), (X + begin.y)));
result.x = first.w;
result.y = second.x;
result.z = second.y;
result.w = second.z;
WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i - 1, (X)), result);
first.w = second.w;
}
}
}
// judge the line of size
int size = sharedNoUpdiv.y % 4;
FLT4 result_fill0;
if (size == 1) {
result_fill0.x = result.x;
result_fill0.y = 0;
result_fill0.z = 0;
result_fill0.w = 0;
WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + out_shape.w - 1, (X)), result_fill0);
} else if (size == 2) {
result_fill0.x = result.x;
result_fill0.y = result.y;
result_fill0.z = 0;
result_fill0.w = 0;
WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + out_shape.w - 1, (X)), result_fill0);
} else if (size == 3) {
result_fill0.x = result.x;
result_fill0.y = result.y;
result_fill0.z = result.z;
result_fill0.w = 0;
WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + out_shape.w - 1, (X)), result_fill0);
}
}
__kernel void slice_NC4HW4(__read_only image2d_t input, __write_only image2d_t output, INT4 input_shape, INT4 out_shape,
INT4 begin, INT2 sharedNoUpdiv) {
int X = get_global_id(1); // H
int Y = get_global_id(2); // W
if (X >= out_shape.y || Y >= out_shape.z) {
return;
}
FLT4 result;
if (sharedNoUpdiv.x % 4 == 0) {
for (int i = 0; i < out_shape.w; i++) {
result = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y)));
WRITE_IMAGE(output, (INT2)((Y), (i * out_shape.y + X)), result);
}
} else {
int begin_postion = sharedNoUpdiv.x % 4;
FLT4 first = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (begin.w) * input_shape.y + (X + begin.y)));
if (begin_postion == 1) {
for (int i = 1; i <= out_shape.w; i++) {
FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y)));
result.x = first.y;
result.y = first.z;
result.z = first.w;
result.w = second.x;
WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result);
first.y = second.y;
first.z = second.z;
first.w = second.w;
}
} else if (begin_postion == 2) {
for (int i = 1; i <= out_shape.w; i++) {
FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y)));
result.x = first.z;
result.y = first.w;
result.z = second.x;
result.w = second.y;
WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result);
first.z = second.z;
first.w = second.w;
}
} else {
for (int i = 1; i <= out_shape.w; i++) {
FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y)));
result.x = first.w;
result.y = second.x;
result.z = second.y;
result.w = second.z;
WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result);
first.w = second.w;
}
}
}
// judge the line of size
int size = sharedNoUpdiv.y % 4;
FLT4 result_fill0 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);
if (size == 1) {
result_fill0.x = result.x;
WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0);
} else if (size == 2) {
result_fill0.x = result.x;
result_fill0.y = result.y;
WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0);
} else if (size == 3) {
result_fill0.x = result.x;
result_fill0.y = result.y;
result_fill0.z = result.z;
WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0);
}
}

+ 59
- 0
mindspore/lite/src/runtime/kernel/opencl/cl/strided_slice.cl View File

@@ -0,0 +1,59 @@
#pragma OPENCL EXTENSION cl_khr_fp16 : enable

__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;

__kernel void strided_slice(__read_only image2d_t input, __write_only image2d_t output, int4 input_shape,
int4 output_shape, int2 io_slices, int4 begin, int4 stride, int4 size) {
int IN = input_shape.x, IH = input_shape.y, IW = input_shape.z, CI = input_shape.w;
int ON = output_shape.x, OH = output_shape.y, OW = output_shape.z, CO = output_shape.w;
int CI_SLICES = io_slices.x, CO_SLICES = io_slices.y;
int on_oh = get_global_id(0);
int ow = get_global_id(1);
int co_slice = get_global_id(2);
int on = on_oh / OH;
int oh = on_oh % OH;
if (on >= ON || oh >= OH || ow >= OW || co_slice >= CO_SLICES) {
return;
}

FLT tmp[4];
for (int i = 0; i < 4; ++i) {
// output_shape idx -> size idx. because squeeze(output_shape)=squeeze(size)
// for example:
// python code: B = A[1, 1:16, 2:16, 3:16]
// input_shape = [16, 16, 16, 16]
// begin = [ 1, 1, 2, 3]
// end = [ 2, 16, 16, 16]
// stride = [ 1, 1, 1, 1]
// size = [ 1, 15, 14, 13] = ceil((end - begin) / stride)
// output_shape = [ 15, 14, 13]
int idx = ((on * OH + oh) * OW + ow) * CO + co_slice * 4 + i;
int co_ = idx % size.w;
idx /= size.w;
int ow_ = idx % size.z;
idx /= size.z;
int oh_ = idx % size.y;
idx /= size.y;
int on_ = idx;

int in = begin.x + stride.x * on_;
int ih = begin.y + stride.y * oh_;
int iw = begin.z + stride.z * ow_;
int ci = begin.w + stride.w * co_;

FLT4 src = READ_IMAGE(input, smp_none, (int2)(iw * CI_SLICES + ci / 4, in * IH + ih));
int offset = ci % 4;
if (offset == 0) {
tmp[i] = src.x;
} else if (offset == 1) {
tmp[i] = src.y;
} else if (offset == 2) {
tmp[i] = src.z;
} else {
tmp[i] = src.w;
}
}

FLT4 out = (FLT4)(tmp[0], tmp[1], tmp[2], tmp[3]);
WRITE_IMAGE(output, (int2)(ow * CO_SLICES + co_slice, on_oh), out);
}

+ 0
- 106
mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc View File

@@ -1,106 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cstring>
#include <string>
#include <algorithm>
#include <set>
#include "src/kernel_registry.h"
#include "src/runtime/kernel/opencl/kernel/slice.h"
#include "src/runtime/kernel/opencl/utils.h"
#include "src/runtime/kernel/opencl/cl/slice.cl.inc"

using mindspore::kernel::KERNEL_ARCH::kGPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Slice;

namespace mindspore::kernel {

int SliceOpenCLKernel::Init() {
std::set<std::string> build_options;
std::string source = slice_source;
std::string program_name = "slice";
std::string kernel_name = "slice_NHWC4";
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
MS_LOG(DEBUG) << kernel_name << " Init Done!";
return RET_OK;
}

void SlcieGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *local, int max_size) {
const int max_divider = 8;
const int max_x = 4, max_y = 8;
int x = std::min(GetMaxDivisorStrategy1(global[0], max_divider), max_x);
int yz = max_size / x;
int y = std::min(std::min(GetMaxDivisorStrategy1(global[1], max_divider), yz), max_y);
int z = std::min(yz / y, static_cast<int>(UP_DIV(global[2], 2)));

local->clear();
local->push_back(x);
local->push_back(y);
local->push_back(z);
}

int SliceOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! ";
auto param = reinterpret_cast<SliceParameter *>(this->op_parameter_);
auto input_shape = in_tensors_[0]->shape();
cl_int4 input_shape_ = {input_shape[0], input_shape[1], input_shape[2], UP_DIV(input_shape[3], C4NUM)};
cl_int4 size_ = {param->size_[0], param->size_[1], param->size_[2], UP_DIV(param->size_[3], C4NUM)};
cl_int4 begin_ = {param->begin_[0], param->begin_[1], param->begin_[2], param->begin_[3] / 4};
cl_int2 sharedNoUpdiv = {param->begin_[3], param->size_[3]};
uint32_t OH = param->size_[1];
uint32_t OW = param->size_[2];

const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize();
std::vector<size_t> local = {1, 1, 1}; // init local
std::vector<size_t> global = {1, OH, OW};
SlcieGetWorkGroup(global, &local, max_global[0]);
int arg_cn = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, size_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, sharedNoUpdiv);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);

return RET_OK;
}

kernel::LiteKernel *OpenCLSliceKernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const lite::InnerContext *ctx, const kernel::KernelKey &desc,
const mindspore::lite::PrimitiveC *primitive) {
auto *kernel = new (std::nothrow) SliceOpenCLKernel(opParameter, inputs, outputs);
if (kernel == nullptr) {
MS_LOG(ERROR) << " new SliceOpenCLKernel failed ";
free(opParameter);
return nullptr;
}
auto ret = kernel->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << " Init kernel failed, name: Slice ";
delete kernel;
return nullptr;
}
return kernel;
}

REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Slice, OpenCLSliceKernelCreator);
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Slice, OpenCLSliceKernelCreator);
} // namespace mindspore::kernel

+ 192
- 0
mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc View File

@@ -0,0 +1,192 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cstring>
#include <deque>
#include <string>
#include <algorithm>
#include <set>
#include "src/kernel_registry.h"
#include "src/runtime/kernel/opencl/kernel/strided_slice.h"
#include "src/runtime/kernel/opencl/utils.h"
#include "src/runtime/kernel/opencl/cl/strided_slice.cl.inc"
#include "nnacl/strided_slice.h"

using mindspore::kernel::KERNEL_ARCH::kGPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Slice;
using mindspore::schema::PrimitiveType_StridedSlice;

namespace mindspore::kernel {

int SliceOpenCLKernel::CheckSpecs() {
const std::string kernel_name = op_parameter_->type_ == PrimitiveType_Slice ? "Slice" : "StridedSlice";
if (in_tensors_.size() != 1) {
MS_LOG(ERROR) << kernel_name + " only supports 1 input Tensor.";
return RET_ERROR;
}
if (out_tensors_.size() != 1) {
MS_LOG(ERROR) << kernel_name + " only supports 1 output Tensor.";
return RET_ERROR;
}
auto in_ndim = in_tensors_.front()->shape().size();
if (in_ndim == 0 || in_ndim > 4) {
MS_LOG(ERROR) << kernel_name + " only supports 1D-4D input tensor";
return RET_ERROR;
}
auto out_ndim = out_tensors_.front()->shape().size();
if (out_ndim > 4) {
MS_LOG(ERROR) << kernel_name + " only supports 0D-4D output tensor";
return RET_ERROR;
}
if (InitConstArgs() != RET_OK) {
MS_LOG(ERROR) << "call SliceOpenCLKernel::InitConstArgs() failed";
return RET_ERROR;
}
return RET_OK;
}

int SliceOpenCLKernel::Prepare() {
std::set<std::string> build_options;
std::string program_name = "strided_slice";
ocl_runtime_->LoadSource(program_name, strided_slice_source);
ocl_runtime_->BuildKernel(kernel_, program_name, "strided_slice", build_options);
SetConstArgs();
SetGlobalLocal();
return RET_OK;
}

int SliceOpenCLKernel::InitConstArgs() {
auto input_info = Image2DInfo(in_tensors_.front());
auto output_info = Image2DInfo(out_tensors_.front());
input_shape_ = {static_cast<cl_int>(input_info.N), static_cast<cl_int>(input_info.H),
static_cast<cl_int>(input_info.W), static_cast<cl_int>(input_info.C)};
output_shape_ = {static_cast<cl_int>(output_info.N), static_cast<cl_int>(output_info.H),
static_cast<cl_int>(output_info.W), static_cast<cl_int>(output_info.C)};
io_slices_ = {static_cast<cl_int>(input_info.Slice), static_cast<cl_int>(output_info.Slice)};

if (op_parameter_->type_ == PrimitiveType_Slice) {
auto param = reinterpret_cast<SliceParameter *>(op_parameter_);
Broadcast2GpuShape(param->begin_, begin_.s, param->param_length_, 0);
Broadcast2GpuShape(param->size_, size_.s, param->param_length_, -1);
for (int i = 0; i < 4; ++i) {
if (begin_.s[i] < 0) {
begin_.s[i] += input_shape_.s[i];
}
if (begin_.s[i] < 0 || begin_.s[i] >= input_shape_.s[i]) {
MS_LOG(ERROR) << "Slice kernel only supports 0<=begin<input_shape but begin[i]=" << begin_.s[i]
<< " input_shape[i]=" << input_shape_.s[i];
return RET_ERROR;
}
if (size_.s[i] < -1 || size_.s[i] == 0) {
MS_LOG(ERROR) << "Slice kernel only supports size=-1 or size>0 but size[i]=" << size_.s[i];
return RET_ERROR;
}
if (size_.s[i] == -1 || begin_.s[i] + size_.s[i] > input_shape_.s[i]) {
size_.s[i] = input_shape_.s[i] - begin_.s[i];
}
}
} else {
auto param = reinterpret_cast<StridedSliceParameter *>(op_parameter_);
cl_int4 end = input_shape_;
Broadcast2GpuShape(param->begins_, begin_.s, param->num_axes_, 0);
Broadcast2GpuShape(param->strides_, stride_.s, param->num_axes_, 1);
Broadcast2GpuShape(param->ends_, end.s, param->num_axes_);

for (int i = 0; i < 4; ++i) {
// begin is negative
if (begin_.s[i] < 0) {
begin_.s[i] += input_shape_.s[i];
}
// avoid begin is out of range
begin_.s[i] = std::clamp(begin_.s[i], 0, input_shape_.s[i] - 1);
// end is negative
if (end.s[i] < 0) {
end.s[i] += input_shape_.s[i];
}
// avoid end is out of range
end.s[i] = std::clamp(end.s[i], -1, input_shape_.s[i]);

// check stride begin end
if (stride_.s[i] > 0) {
if (begin_.s[i] >= end.s[i]) {
MS_LOG(ERROR) << "StridedSlice kernel only supports begin_<end when stride>0";
return RET_ERROR;
}
} else if (stride_.s[i] < 0) {
if (begin_.s[i] <= end.s[i]) {
MS_LOG(ERROR) << "StridedSlice kernel only supports begin_>end when stride<0";
return RET_ERROR;
}
} else {
MS_LOG(ERROR) << "StridedSlice kernel only supports stride!=0";
return RET_ERROR;
}
size_.s[i] = std::ceil(static_cast<float>(end.s[i] - begin_.s[i]) / static_cast<float>(stride_.s[i]));
}
}

// check size
std::vector<int> shape_not_1;
std::vector<int> size_not_1;
std::copy_if(out_tensors_.front()->shape().begin(), out_tensors_.front()->shape().end(), shape_not_1.begin(),
[](int x) { return x > 1; });
std::copy_if(size_.s, size_.s + 4, size_not_1.begin(), [](int x) { return x > 1; });
if (shape_not_1 != size_not_1) {
MS_LOG(ERROR) << "Slice/StridedSlice kernel output shape infer error";
return RET_ERROR;
}
return RET_OK;
}

void SliceOpenCLKernel::SetConstArgs() {
int arg_cn = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn, size_);
}

void SliceOpenCLKernel::SetGlobalLocal() {
auto output_info = Image2DInfo(out_tensors_.front());
std::vector<size_t> global = {output_info.N * output_info.H, output_info.W, output_info.Slice};

const int max_divider = 8;
auto max_work_group_size = ocl_runtime_->DeviceMaxWorkGroupSize();
size_t local_c = GetMaxDivisorStrategy0(global[2], max_divider);
size_t local_hw = max_work_group_size / local_c;
size_t local_h = std::min(UP_DIV(global[0], 2), local_hw);
size_t local_w = std::min(local_hw / local_h, global[1]);
std::vector<size_t> local = {local_h, local_w, local_c};
AlignGlobalLocal(global, local);
}

int SliceOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! ";
ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c());
ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr);
return RET_OK;
}

REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Slice, OpenCLKernelCreator<SliceOpenCLKernel>);
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Slice, OpenCLKernelCreator<SliceOpenCLKernel>);
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_StridedSlice, OpenCLKernelCreator<SliceOpenCLKernel>);
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_StridedSlice, OpenCLKernelCreator<SliceOpenCLKernel>);
} // namespace mindspore::kernel

mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h → mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h View File

@@ -14,8 +14,8 @@
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SLICE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SLICE_H_
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_STRIDED_SLICE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_STRIDED_SLICE_H_

#include <vector>
#include "src/runtime/kernel/opencl/opencl_kernel.h"
@@ -31,12 +31,23 @@ class SliceOpenCLKernel : public OpenCLKernel {

~SliceOpenCLKernel() override = default;

int Init() override;

int Prepare() override;
int Run() override;

int CheckSpecs() override;
void SetConstArgs() override;
void SetGlobalLocal() override;

private:
int InitConstArgs();

cl::Kernel kernel_;
cl_int4 input_shape_{};
cl_int4 output_shape_{};
cl_int2 io_slices_{};
cl_int4 begin_{};
cl_int4 stride_{{1, 1, 1, 1}};
cl_int4 size_{};
};

} // namespace mindspore::kernel

+ 42
- 8
mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h View File

@@ -34,33 +34,67 @@ struct OpenCLToFormatParameter {
lite::opencl::MemType out_mem_type{lite::opencl::MemType::IMG};
};

template <typename SrcT, typename DstT>
void Broadcast2GpuShape(const SrcT *src, DstT *dst, int src_num) {
auto *N = dst;
auto *H = dst + 1;
auto *W = dst + 2;
auto *C = dst + 3;
if (src_num == 1) {
*N = src[0];
} else if (src_num == 2) {
*N = src[0];
*C = src[1];
} else if (src_num == 3) {
*N = src[0];
*W = src[1];
*C = src[2];
} else if (src_num == 4) {
*N = src[0];
*H = src[1];
*W = src[2];
*C = src[3];
} else if (src_num >= 5) {
MS_LOG(ERROR) << "GPU doesn't support ndim>=" << src_num;
}
}

template <typename SrcT, typename DstT>
void Broadcast2GpuShape(const SrcT *src, DstT *dst, int src_num, DstT default_value) {
for (int i = 0; i < 4; ++i) {
dst[i] = default_value;
}
Broadcast2GpuShape(src, dst, src_num);
}

struct Image2DInfo {
explicit Image2DInfo(const lite::Tensor *tensor) {
if (tensor == nullptr) {
return;
}

auto shape = tensor->shape();
if (shape.size() == 1) {
auto ndim = shape.size();
if (ndim == 1) {
N = shape[0];
} else if (shape.size() == 2) {
} else if (ndim == 2) {
N = shape[0];
C = shape[1];
} else if (shape.size() == 3) {
} else if (ndim == 3) {
N = shape[0];
W = shape[1];
C = shape[2];
} else if (shape.size() == 4) {
} else if (ndim == 4) {
N = shape[0];
H = shape[1];
W = shape[2];
C = shape[3];
} else if (shape.size() >= 5) {
MS_LOG(ERROR) << "GPU dont't support Tensor with dim=" << shape.size();
} else if (ndim >= 5) {
MS_LOG(ERROR) << "GPU doesn't support Tensor with ndim>=" << ndim;
}
Slice = UP_DIV(C, C4NUM);

FLT_size = tensor->data_type() == kNumberTypeFloat16 ? sizeof(cl_half) : sizeof(cl_float);
FLT4_size = FLT_size * 4;
Slice = UP_DIV(C, C4NUM);
if (W * Slice <= MAX_IMAGE2D_SIZE) {
height = N * H;
width = W * Slice;


+ 131
- 337
mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc View File

@@ -13,357 +13,151 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <memory>
#include "src/common/log_adapter.h"
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h"
#include "mindspore/lite/src/common/file_utils.h"
#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h"
#include "nnacl/slice_parameter.h"
#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h"

namespace mindspore {
class TestSliceOpenCLfp32 : public mindspore::CommonTest {
public:
TestSliceOpenCLfp32() {}
};
class TestSliceOpenCLfp16 : public mindspore::CommonTest {
public:
TestSliceOpenCLfp16() {}
};

template <typename T>
void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bound) {
for (size_t i = 0; i < size; i++) {
T abs = fabs(output_data[i] - correct_data[i]);
ASSERT_LE(abs, err_bound);
}
}

TEST_F(TestSliceOpenCLfp32, Slicefp32CI) {
MS_LOG(INFO) << " begin test ";
auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance();
ocl_runtime->Init();
auto allocator = ocl_runtime->GetAllocator();

MS_LOG(INFO) << " Read tensors from .bin ";
std::vector<int> input_shape = {1, 2, 2, 8};
std::vector<int> output_shape = {1, 2, 2, 5};
std::vector<int> begin = {0, 0, 0, 2};
std::vector<int> size = {1, 2, 2, 5};
auto data_type = kNumberTypeFloat32;
auto tensor_type = lite::Tensor::CONST_TENSOR;

float input_data[] = {-0.45816937, 0.92391545, -0.9135602, -1.4002057, 1.1080881, 0.40712625, -0.28128958,
0.09470133, 0.19801073, 0.04927751, -1.2808367, 0.1470597, 0.03393711, -0.33282498,
-1.0433807, -1.3678077, -0.6423931, 0.5584889, 0.28965706, 0.5343769, 0.75480366,
-1.9328151, -0.48714373, 1.711132, -1.8871949, -0.2987629, -0.14000037, -0.080552,
0.95056856, -0.06886655, 0.5316237, 0.05787678};
float correct_data[] = {-0.9135602, -1.4002057, 1.1080881, 0.40712625, -0.28128958, -1.2808367, 0.1470597,
0.03393711, -0.33282498, -1.0433807, 0.28965706, 0.5343769, 0.75480366, -1.9328151,
-0.48714373, -0.14000037, -0.080552, 0.95056856, -0.06886655, 0.5316237};
MS_LOG(INFO) << " construct tensors ";
lite::Tensor *tensor_data = new (std::nothrow) lite::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type);
if (tensor_data == nullptr) {
MS_LOG(INFO) << " init tensor failed ";
return;
}
auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type);
if (output_tensor == nullptr) {
delete tensor_data;
MS_LOG(INFO) << " init tensor failed ";
return;
}
std::vector<lite::Tensor *> inputs = {tensor_data};
std::vector<lite::Tensor *> outputs = {output_tensor};
class TestSliceOpenCL : public mindspore::CommonTest {};

MS_LOG(INFO) << "setting SliceParameter ";
auto param = reinterpret_cast<SliceParameter *>(malloc(sizeof(SliceParameter)));
OpParameter *GetSliceParameter(const std::vector<int> &begin, const std::vector<int> &size) {
auto param = static_cast<SliceParameter *>(malloc(sizeof(SliceParameter)));
if (param == nullptr) {
for (auto tensor : inputs) {
delete tensor;
}
for (auto tensor : outputs) {
delete tensor;
}
MS_LOG(INFO) << "new SliceParameter failed ";
return;
MS_LOG(ERROR) << "SliceParameter create error.";
return nullptr;
}
for (int i = 0; i < input_shape.size(); i++) {
param->op_parameter_.type_ = schema::PrimitiveType_Slice;
param->param_length_ = begin.size();
for (int i = 0; i < begin.size(); ++i) {
param->begin_[i] = begin[i];
param->size_[i] = size[i];
}

auto *slice_kernel =
new (std::nothrow) kernel::SliceOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
if (slice_kernel == nullptr) {
for (auto tensor : inputs) {
delete tensor;
}
for (auto tensor : outputs) {
delete tensor;
}
delete param;
MS_LOG(INFO) << "new kernel::slice_kernel failed ";
return;
}
slice_kernel->Init();

// to do allocate memory for inputs and outputs
for (auto &input_tensor : inputs) {
input_tensor->MallocData(allocator);
}

MS_LOG(INFO) << " initialize sub_graph ";
std::vector<kernel::LiteKernel *> kernels{slice_kernel};
auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
if (sub_graph == nullptr) {
for (auto tensor : inputs) {
delete tensor;
}
for (auto tensor : outputs) {
delete tensor;
}
delete param;
delete slice_kernel;
MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed ";
return;
}
sub_graph->Init();

MS_LOG(INFO) << " init tensors ";
memcpy(inputs[0]->data_c(), input_data, sizeof(input_data));

std::cout << "==================output data================" << std::endl;
sub_graph->Run();

auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c());
CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001);
for (auto tensor : inputs) {
tensor->set_data(nullptr);
delete tensor;
}
for (auto tensor : outputs) {
tensor->set_data(nullptr);
delete tensor;
}
delete sub_graph;
return reinterpret_cast<OpParameter *>(param);
}

TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) {
MS_LOG(INFO) << " begin test ";
auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance();
ocl_runtime->Init();
auto allocator = ocl_runtime->GetAllocator();

MS_LOG(INFO) << " Read tensors from .bin ";
std::vector<int> input_shape = {1, 19, 19, 96};
std::vector<int> output_shape = {1, 10, 10, 13};
std::vector<int> begin = {0, 2, 3, 4};
std::vector<int> size = {1, 10, 10, 13};
auto data_type = kNumberTypeFloat32;
auto tensor_type = lite::Tensor::CONST_TENSOR;

// get the input from .bin
size_t input_size, output_size;
std::string input_path = "./test_data/in_slicefp32.bin";
std::string output_path = "./test_data/out_slicefp32.bin";
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
auto correct_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(output_path.c_str(), &output_size));
MS_LOG(INFO) << " construct tensors ";
lite::Tensor *tensor_data = new (std::nothrow) lite::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type);
if (tensor_data == nullptr) {
MS_LOG(INFO) << " init tensor failed ";
return;
}
auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type);
if (output_tensor == nullptr) {
delete tensor_data;
MS_LOG(INFO) << " init tensor failed ";
return;
}
std::vector<lite::Tensor *> inputs = {tensor_data};
std::vector<lite::Tensor *> outputs = {output_tensor};

MS_LOG(INFO) << "setting SliceParameter ";
auto param = reinterpret_cast<SliceParameter *>(malloc(sizeof(SliceParameter)));
if (param == nullptr) {
for (auto tensor : inputs) {
delete tensor;
}
for (auto tensor : outputs) {
delete tensor;
}
MS_LOG(INFO) << "new SliceParameter failed ";
return;
}
for (int i = 0; i < input_shape.size(); i++) {
param->begin_[i] = begin[i];
param->size_[i] = size[i];
}

auto *slice_kernel =
new (std::nothrow) kernel::SliceOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
if (slice_kernel == nullptr) {
for (auto tensor : inputs) {
delete tensor;
}
for (auto tensor : outputs) {
delete tensor;
}
delete param;
MS_LOG(INFO) << "new kernel::slice_kernel failed ";
return;
}
slice_kernel->Init();

// to do allocate memory for inputs and outputs
for (auto &input_tensor : inputs) {
input_tensor->MallocData(allocator);
}

MS_LOG(INFO) << " initialize sub_graph ";
std::vector<kernel::LiteKernel *> kernels{slice_kernel};
auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
if (sub_graph == nullptr) {
for (auto tensor : inputs) {
delete tensor;
}
for (auto tensor : outputs) {
delete tensor;
}
delete param;
delete slice_kernel;
MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed ";
return;
}
sub_graph->Init();

MS_LOG(INFO) << " init tensors ";
memcpy(inputs[0]->data_c(), input_data, input_size);

std::cout << "==================output data================" << std::endl;
sub_graph->Run();

auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c());
CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001);
for (auto tensor : inputs) {
tensor->set_data(nullptr);
delete tensor;
}
for (auto tensor : outputs) {
tensor->set_data(nullptr);
delete tensor;
}
delete sub_graph;
TEST_F(TestSliceOpenCL, 4D) {
float input_data[] = {-0.45816937, 0.92391545, -0.9135602, -1.4002057, 1.1080881, 0.40712625, -0.28128958,
0.09470133, 0.19801073, 0.04927751, -1.2808367, 0.1470597, 0.03393711, -0.33282498,
-1.0433807, -1.3678077, -0.6423931, 0.5584889, 0.28965706, 0.5343769, 0.75480366,
-1.9328151, -0.48714373, 1.711132, -1.8871949, -0.2987629, -0.14000037, -0.080552,
0.95056856, -0.06886655, 0.5316237, 0.05787678};
float expect_data[] = {-0.9135602, -1.4002057, 1.1080881, 0.40712625, -0.28128958, -1.2808367, 0.1470597,
0.03393711, -0.33282498, -1.0433807, 0.28965706, 0.5343769, 0.75480366, -1.9328151,
-0.48714373, -0.14000037, -0.080552, 0.95056856, -0.06886655, 0.5316237};
auto param = GetSliceParameter({0, 0, 0, 2}, {1, 2, 2, 5});
TestMain({{{1, 2, 2, 8}, input_data, Tensor::Category::VAR}}, {{1, 2, 2, 5}, expect_data}, param, false);
}

TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) {
MS_LOG(INFO) << " begin test ";
auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance();
ocl_runtime->SetFp16Enable(true);
ocl_runtime->Init();
auto allocator = ocl_runtime->GetAllocator();

MS_LOG(INFO) << " Read tensors from .bin ";
std::vector<int> input_shape = {1, 25, 25, 48};
std::vector<int> output_shape = {1, 24, 24, 15};
std::vector<int> begin = {0, 1, 1, 7};
std::vector<int> size = {1, 24, 24, 15};
auto data_type = kNumberTypeFloat16;
auto tensor_type = lite::Tensor::CONST_TENSOR;

// get the input from .bin
size_t input_size, output_size;
std::string input_path = "./test_data/in_slicefp16.bin";
std::string output_path = "./test_data/out_slicefp16.bin";
auto input_data = reinterpret_cast<float16_t *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
auto correct_data = reinterpret_cast<float16_t *>(mindspore::lite::ReadFile(output_path.c_str(), &output_size));

MS_LOG(INFO) << " construct tensors ";
lite::Tensor *tensor_data = new (std::nothrow) lite::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type);
if (tensor_data == nullptr) {
MS_LOG(INFO) << " init tensor failed ";
return;
}
auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC4, tensor_type);
if (output_tensor == nullptr) {
delete tensor_data;
MS_LOG(INFO) << " init tensor failed ";
return;
TEST_F(TestSliceOpenCL, tflite_cpu) {
std::vector<std::tuple<std::string, std::vector<int>, std::vector<int>, std::vector<float>, std::vector<float>,
std::vector<int>, std::vector<int>>>
cases = {{"In1D", {4}, {2}, {1, 2, 3, 4}, {2, 3}, {1}, {2}},
{"In2D", {2, 3}, {1, 2}, {1, 2, 3, 4, 5, 6}, {4, 5}, {1, 0}, {1, 2}},
{"In3D",
{2, 3, 2},
{2, 3, 2},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{0, 0, 0},
{2, 3, 2}},
{"InputFloat", {4, 1, 1, 1}, {3, 1, 1, 1}, {1, 2, 3, 4}, {2, 3, 4}, {1, 0, 0, 0}, {3, 1, 1, 1}},
{"IndexInt64", {4, 1, 1, 1}, {3, 1, 1, 1}, {1, 2, 3, 4}, {2, 3, 4}, {1, 0, 0, 0}, {3, 1, 1, 1}},
{"InputInteger1",
{3, 2, 3, 1},
{1, 1, 3, 1},
{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
{3, 3, 3},
{1, 0, 0, 0},
{1, 1, 3, 1}},
{"InputInteger2",
{3, 2, 3, 1},
{1, 2, 3, 1},
{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
{3, 3, 3, 4, 4, 4},
{1, 0, 0, 0},
{1, 2, 3, 1}},
{"InputInteger3",
{3, 2, 3, 1},
{2, 1, 3, 1},
{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
{3, 3, 3, 5, 5, 5},
{1, 0, 0, 0},
{2, 1, 3, 1}},
{"SizeMinus1",
{3, 2, 3, 1},
{2, 1, 3, 1},
{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
{3, 3, 3, 5, 5, 5},
{1, 0, 0, 0},
{2, 1, -1, 1}},
{"BeginNonZeroSizeMinus1Axis1",
{3, 3, 2, 1},
{2, 2, 1, 1},
{1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9},
{5, 6, 8, 9},
{1, 1, 0, 0},
{2, -1, 1, 1}},
{"BeginNonZeroSizeMinus1Axis2",
{3, 2, 3, 1},
{2, 1, 2, 1},
{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
{3, 3, 5, 5},
{1, 0, 1, 0},
{2, 1, -1, 1}},
{"BeginNonZeroSizeMinus1Axis3",
{3, 1, 2, 3},
{2, 1, 1, 2},
{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
{3, 3, 5, 5},
{1, 0, 0, 1},
{2, 1, 1, -1}},
{"SliceUint8",
{3, 2, 3, 1},
{2, 1, 3, 1},
{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
{3, 3, 3, 5, 5, 5},
{1, 0, 0, 0},
{2, 1, -1, 1}},
{"SliceInt8",
{3, 2, 3, 1},
{2, 1, 3, 1},
{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
{3, 3, 3, 5, 5, 5},
{1, 0, 0, 0},
{2, 1, -1, 1}},
{"SliceInt16",
{3, 2, 3, 1},
{2, 1, 3, 1},
{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
{3, 3, 3, 5, 5, 5},
{1, 0, 0, 0},
{2, 1, -1, 1}},
{"SliceInt64",
{3, 2, 3, 1},
{2, 1, 3, 1},
{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
{3, 3, 3, 5, 5, 5},
{1, 0, 0, 0},
{2, 1, -1, 1}}};

for (auto &case_ : cases) {
auto &name = std::get<0>(case_);
auto &input_shape = std::get<1>(case_);
auto &output_shape = std::get<2>(case_);
auto &input_data = std::get<3>(case_);
auto &expect_data = std::get<4>(case_);
auto &begin = std::get<5>(case_);
auto &size = std::get<6>(case_);

std::cout << name << std::endl;
auto *param = GetSliceParameter(begin, size);
TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param,
false);
param = GetSliceParameter(begin, size);
TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param,
true);
}
std::vector<lite::Tensor *> inputs = {tensor_data};
std::vector<lite::Tensor *> outputs = {output_tensor};

MS_LOG(INFO) << " setting SliceParameter ";
auto param = reinterpret_cast<SliceParameter *>(malloc(sizeof(SliceParameter)));
if (param == nullptr) {
for (auto tensor : inputs) {
delete tensor;
}
for (auto tensor : outputs) {
delete tensor;
}
MS_LOG(INFO) << " new SliceParameter failed ";
return;
}
for (int i = 0; i < input_shape.size(); i++) {
param->begin_[i] = begin[i];
param->size_[i] = size[i];
}

auto *slice_kernel =
new (std::nothrow) kernel::SliceOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
if (slice_kernel == nullptr) {
for (auto tensor : inputs) {
delete tensor;
}
for (auto tensor : outputs) {
delete tensor;
}
delete param;
MS_LOG(INFO) << " new kernel::slice_kernel failed ";
return;
}
slice_kernel->Init();

// to do allocate memory for inputs and outputs
for (auto &input_tensor : inputs) {
input_tensor->MallocData(allocator);
}

MS_LOG(INFO) << " initialize sub_graph ";
std::vector<kernel::LiteKernel *> kernels{slice_kernel};
auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
if (sub_graph == nullptr) {
for (auto tensor : inputs) {
delete tensor;
}
for (auto tensor : outputs) {
delete tensor;
}
delete param;
delete slice_kernel;
MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed ";
return;
}
sub_graph->Init();

MS_LOG(INFO) << " init tensors ";
memcpy(inputs[0]->data_c(), input_data, input_size);
} // namespace mindspore

std::cout << "==================output data================" << std::endl;
sub_graph->Run();
auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c());
CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001);
for (auto tensor : inputs) {
tensor->set_data(nullptr);
delete tensor;
}
for (auto tensor : outputs) {
tensor->set_data(nullptr);
delete tensor;
}
delete sub_graph;
}
} // namespace mindspore

+ 317
- 0
mindspore/lite/test/ut/src/runtime/kernel/opencl/strided_slice_tests.cc View File

@@ -0,0 +1,317 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common/common_test.h"
#include "nnacl/strided_slice.h"
#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h"

namespace mindspore {

class TestStridedSliceOpenCL : public mindspore::CommonTest {};

OpParameter *GetStridedSliceParameter(const std::vector<int> &begins, const std::vector<int> &ends,
const std::vector<int> &strides) {
auto param = static_cast<StridedSliceParameter *>(malloc(sizeof(StridedSliceParameter)));
if (param == nullptr) {
MS_LOG(ERROR) << "create StridedSliceParameter error.";
return nullptr;
}
param->op_parameter_.type_ = schema::PrimitiveType_StridedSlice;
param->num_axes_ = begins.size();
for (int i = 0; i < begins.size(); ++i) {
param->begins_[i] = begins[i];
param->ends_[i] = ends[i];
param->strides_[i] = strides[i];
}
return reinterpret_cast<OpParameter *>(param);
}

TEST_F(TestStridedSliceOpenCL, 1D) {
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
float expect_data[] = {3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33};
auto *param = GetStridedSliceParameter({3}, {36}, {3});
TestMain({{{36}, input_data, Tensor::Category::VAR}}, {{11}, expect_data}, param, false);
}

TEST_F(TestStridedSliceOpenCL, 2D) {
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
float expect_data[] = {11, 14};
auto *param = GetStridedSliceParameter({1, 2}, {3, 8}, {2, 3});
TestMain({{{4, 9}, input_data, Tensor::Category::VAR}}, {{1, 2}, expect_data}, param, false);
}

TEST_F(TestStridedSliceOpenCL, 3D) {
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
float expect_data[] = {11, 14};
auto *param = GetStridedSliceParameter({0, 1, 2}, {1, 3, 8}, {1, 2, 3});
TestMain({{{1, 4, 9}, input_data, Tensor::Category::VAR}}, {{1, 1, 2}, expect_data}, param, false);
}

TEST_F(TestStridedSliceOpenCL, 4D) {
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};

float expect_data0[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
auto *param = GetStridedSliceParameter({0, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1});
TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{2, 2, 3, 3}, expect_data0}, param, false);

param = GetStridedSliceParameter({0, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1});
TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{2, 2, 3, 3}, expect_data0}, param, true);

float expect_data1[] = {18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
param = GetStridedSliceParameter({1, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1});
TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 2, 3, 3}, expect_data1}, param, false);

float expect_data2[] = {27, 28, 29, 30, 31, 32, 33, 34, 35};
param = GetStridedSliceParameter({1, 1, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1});
TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 1, 3, 3}, expect_data2}, param, false);

float expect_data3[] = {33, 34, 35};
param = GetStridedSliceParameter({1, 1, 2, 0}, {2, 2, 3, 3}, {1, 1, 1, 1});
TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 1, 1, 3}, expect_data3}, param, false);

float expect_data4[] = {34};
param = GetStridedSliceParameter({1, 1, 2, 1}, {2, 2, 3, 2}, {1, 1, 1, 1});
TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 1, 1, 1}, expect_data4}, param, false);
}

TEST_F(TestStridedSliceOpenCL, 4D_stride2) {
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
float expect_data[] = {13, 14, 31, 32};
auto *param = GetStridedSliceParameter({0, 1, 1, 1}, {1, 4, 3, 3}, {2, 2, 2, 1});
TestMain({{{1, 4, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 2, 1, 2}, expect_data}, param, false);
}

TEST_F(TestStridedSliceOpenCL, 4D_to_3D) {
float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
float expect_data[] = {18, 20, 21, 23, 27, 29, 30, 32};
auto *param = GetStridedSliceParameter({1, 0, 0, 0}, {2, 2, 2, 3}, {1, 1, 1, 2});
TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{2, 2, 2}, expect_data}, param, false);
}

TEST_F(TestStridedSliceOpenCL, In1D_OutOfRangeBeginNegativeStride) {
float input_data[] = {1, 2, 3, 4};
float expect_data[] = {4, 3, 2};
auto *param = GetStridedSliceParameter({5}, {0}, {-1});
TestMain({{{4}, input_data, Tensor::Category::VAR}}, {{3}, expect_data}, param, false);
}

TEST_F(TestStridedSliceOpenCL, tflite_cpu) {
std::vector<float> values(32768);
for (int i = 0; i < values.size(); ++i) {
values[i] = i % 1000;
}
std::vector<std::tuple<std::string, std::vector<int>, std::vector<int>, std::vector<float>, std::vector<float>,
std::vector<int>, std::vector<int>, std::vector<int>>>
cases = {{"In1D", {4}, {2}, {1, 2, 3, 4}, {2, 3}, {1}, {3}, {1}},
{"In1D_Int32End", {32768}, {32768}, values, values, {0}, {32768}, {1}},
{"In1D_NegativeBegin", {4}, {2}, {1, 2, 3, 4}, {2, 3}, {-3}, {3}, {1}},
{"In1D_OutOfRangeBegin", {4}, {3}, {1, 2, 3, 4}, {1, 2, 3}, {-5}, {3}, {1}},
{"In1D_NegativeEnd", {4}, {1}, {1, 2, 3, 4}, {2}, {1}, {-2}, {1}},
{"In1D_OutOfRangeEnd", {4}, {3}, {1, 2, 3, 4}, {2, 3, 4}, {-3}, {5}, {1}},
{"In1D_NegativeBeginNegativeStride", {4}, {1}, {1, 2, 3, 4}, {3}, {-2}, {-3}, {-1}},
{"In1D_OutOfRangeBeginNegativeStride", {4}, {1}, {1, 2, 3, 4}, {4}, {5}, {2}, {-1}},
{"In1D_NegativeEndNegativeStride", {4}, {2}, {1, 2, 3, 4}, {3, 2}, {2}, {-4}, {-1}},
{"In1D_OutOfRangeEndNegativeStride", {4}, {2}, {1, 2, 3, 4}, {2, 1}, {-3}, {-5}, {-1}},
{"In1D_NegStride", {3}, {3}, {1, 2, 3}, {3, 2, 1}, {-1}, {-4}, {-1}},
{"In1D_EvenLenStride2", {2}, {1}, {1, 2}, {1}, {0}, {2}, {2}},
{"In1D_OddLenStride2", {3}, {2}, {1, 2, 3}, {1, 3}, {0}, {3}, {2}},
{"In2D_Identity", {2, 3}, {2, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {0, 0}, {2, 3}, {1, 1}},
{"In2D", {2, 3}, {1, 2}, {1, 2, 3, 4, 5, 6}, {4, 5}, {1, 0}, {2, 2}, {1, 1}},
{"In2D_Stride2", {2, 3}, {1, 2}, {1, 2, 3, 4, 5, 6}, {1, 3}, {0, 0}, {2, 3}, {2, 2}},
{"In2D_NegStride", {2, 3}, {1, 3}, {1, 2, 3, 4, 5, 6}, {6, 5, 4}, {1, -1}, {2, -4}, {2, -1}},
{"In2D_BeginMask", {2, 3}, {2, 2}, {1, 2, 3, 4, 5, 6}, {1, 2, 4, 5}, {0, 0}, {2, 2}, {1, 1}},
{"In2D_EndMask", {2, 3}, {1, 3}, {1, 2, 3, 4, 5, 6}, {4, 5, 6}, {1, 0}, {2, 3}, {1, 1}},
{"In2D_NegStrideBeginMask", {2, 3}, {1, 3}, {1, 2, 3, 4, 5, 6}, {6, 5, 4}, {1, -1}, {2, -4}, {1, -1}},
{"In2D_NegStrideEndMask", {2, 3}, {1, 2}, {1, 2, 3, 4, 5, 6}, {6, 5}, {1, -1}, {2, 0}, {1, -1}},
{"In3D_Identity",
{2, 3, 2},
{2, 3, 2},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{0, 0, 0},
{2, 3, 2},
{1, 1, 1}},
{"In3D_NegStride",
{2, 3, 2},
{2, 3, 2},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1},
{-1, -1, -1},
{-3, -4, -3},
{-1, -1, -1}},
{"In3D_Strided2",
{2, 3, 2},
{1, 2, 1},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{1, 5},
{0, 0, 0},
{2, 3, 2},
{2, 2, 2}},
{"In1D_ShrinkAxisMask1", {4}, {1}, {1, 2, 3, 4}, {2}, {1}, {2}, {1}},
{"In1D_ShrinkAxisMask1_NegativeSlice", {4}, {1}, {0, 1, 2, 3}, {3}, {-1}, {4}, {1}},
{"In2D_ShrinkAxis3_NegativeSlice", {4, 1}, {1}, {0, 1, 2, 3}, {2}, {-2, -1}, {3, 1}, {1, 1}},
{"In2D_ShrinkAxis2_BeginEndAxis1_NegativeSlice",
{4, 1},
{4},
{0, 1, 2, 3},
{0, 1, 2, 3},
{0, -1},
{4, 1},
{1, 1}},
{"In1D_BeginMaskShrinkAxisMask1", {4}, {1}, {1, 2, 3, 4}, {1}, {0}, {1}, {1}},
{"In2D_ShrinkAxisMask1", {2, 3}, {3}, {1, 2, 3, 4, 5, 6}, {1, 2, 3}, {0, 0}, {1, 3}, {1, 1}},
{"In2D_ShrinkAxisMask2", {2, 3}, {2}, {1, 2, 3, 4, 5, 6}, {1, 4}, {0, 0}, {2, 1}, {1, 1}},
{"In2D_ShrinkAxisMask3", {2, 3}, {1}, {1, 2, 3, 4, 5, 6}, {1}, {0, 0}, {1, 1}, {1, 1}},
{"In3D_IdentityShrinkAxis1",
{2, 3, 2},
{3, 2},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{1, 2, 3, 4, 5, 6},
{0, 0, 0},
{1, 3, 2},
{1, 1, 1}},
{"In3D_IdentityShrinkAxis2",
{2, 3, 2},
{2, 2},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{1, 2, 7, 8},
{0, 0, 0},
{2, 1, 2},
{1, 1, 1}},
{"In3D_IdentityShrinkAxis3",
{2, 3, 2},
{2},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{1, 2},
{0, 0, 0},
{1, 1, 2},
{1, 1, 1}},
{"In3D_IdentityShrinkAxis4",
{2, 3, 2},
{2, 3},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{1, 3, 5, 7, 9, 11},
{0, 0, 0},
{2, 3, 1},
{1, 1, 1}},
{"In3D_IdentityShrinkAxis5",
{2, 3, 2},
{3},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{1, 3, 5},
{0, 0, 0},
{1, 3, 1},
{1, 1, 1}},
{"In3D_IdentityShrinkAxis6",
{2, 3, 2},
{2},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{1, 7},
{0, 0, 0},
{2, 1, 1},
{1, 1, 1}},
{"In3D_IdentityShrinkAxis7",
{2, 3, 2},
{1},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{1},
{0, 0, 0},
{1, 1, 1},
{1, 1, 1}},
{"In3D_IdentityShrinkAxis1Uint8",
{2, 3, 2},
{3, 2},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{1, 2, 3, 4, 5, 6},
{0, 0, 0},
{1, 3, 2},
{1, 1, 1}},
{"In3D_IdentityShrinkAxis1int8",
{2, 3, 2},
{3, 2},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{1, 2, 3, 4, 5, 6},
{0, 0, 0},
{1, 3, 2},
{1, 1, 1}},
{"In5D_Identity",
{2, 2, 2, 2},
{2, 1, 2, 2},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
{1, 2, 3, 4, 9, 10, 11, 12},
{0, 0, 0, 0},
{2, 1, 2, 2},
{1, 1, 1, 1}},
{"In5D_IdentityShrinkAxis1",
{2, 2, 2, 2},
{1, 2, 2},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
{1, 2, 3, 4},
{0, 0, 0, 0},
{1, 1, 2, 2},
{1, 1, 1, 1}},
{"In3D_SmallBegin",
{2, 3, 2},
{1, 3, 2},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{1, 2, 3, 4, 5, 6},
{0},
{1},
{1}},
{"In3D_SmallBeginWithhrinkAxis1",
{2, 3, 2},
{3, 2},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
{1, 2, 3, 4, 5, 6},
{0},
{1},
{1}}};

for (auto &case_ : cases) {
auto &name = std::get<0>(case_);
auto &input_shape = std::get<1>(case_);
auto &output_shape = std::get<2>(case_);
auto &input_data = std::get<3>(case_);
auto &expect_data = std::get<4>(case_);
auto &begin = std::get<5>(case_);
auto &end = std::get<6>(case_);
auto &stride = std::get<7>(case_);

std::cout << name << std::endl;
auto *param = GetStridedSliceParameter(begin, end, stride);
TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param,
false);
param = GetStridedSliceParameter(begin, end, stride);
TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param,
true);
}
}

TEST_F(TestStridedSliceOpenCL, tflite_opencl) {
float input_data[] = {0.1f, 0.2f, 0.3f, 0.4, 1.1f, 1.2f, 1.3f, 1.4, 10.1f, 10.2f, 10.3f, 10.4,
11.1f, 11.2f, 11.3f, 11.4, 20.1f, 20.2f, 20.3f, 20.4, 21.1f, 21.2f, 21.3f, 21.4};
float expect_data[] = {10.2, 10.4, 20.2, 20.4};
auto *param = GetStridedSliceParameter({0, 1, 0, 1}, {1, 3, 2, 4}, {1, 1, 2, 2});
TestMain({{{1, 3, 2, 4}, input_data, Tensor::Category::VAR}}, {{1, 2, 1, 2}, expect_data}, param, false);
}

} // namespace mindspore

+ 84
- 2
mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.cc View File

@@ -15,11 +15,17 @@
*/

#include <string>
#include "src/common/log_adapter.h"
#include "mindspore/lite/src/common/file_utils.h"
#include "common/common_test.h"
#include "src/kernel_registry.h"
#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h"

using mindspore::kernel::LiteKernel;
using mindspore::kernel::SubGraphOpenCLKernel;
using mindspore::lite::KernelRegistry;
using mindspore::lite::Tensor;
using mindspore::schema::Format::Format_NHWC;

namespace mindspore {

void LoadTestData(void *dst, size_t dst_size, const std::string &file_path) {
@@ -35,4 +41,80 @@ void LoadTestData(void *dst, size_t dst_size, const std::string &file_path) {
}
}

void TestMain(const std::vector<std::tuple<std::vector<int>, float *, Tensor::Category>> &input_infos,
std::tuple<std::vector<int>, float *> output_info, OpParameter *op_parameter, bool fp16_enable,
float atol, bool print_output) {
MS_LOG(DEBUG) << "initialize OpenCLRuntime and OpenCLAllocator";
auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper();
auto ocl_runtime = runtime_wrapper.GetInstance();
EXPECT_TRUE(ocl_runtime->Init() == RET_OK);
ocl_runtime->SetFp16Enable(fp16_enable);
auto allocator = ocl_runtime->GetAllocator();

MS_LOG(DEBUG) << "create Tensors & init weight data";
std::vector<Tensor> tensors;
std::vector<Tensor *> kernel_inputs;
std::vector<Tensor *> subgraph_inputs;
std::map<Tensor *, float *> subgraph_inputs_data;
for (auto input_info : input_infos) {
const std::vector<int> &shape = std::get<0>(input_info);
auto *input_data = std::get<1>(input_info);
const Tensor::Category category = std::get<2>(input_info);
tensors.emplace_back(kNumberTypeFloat32, shape, Format_NHWC, category);
auto *new_tensor = &tensors.back();
kernel_inputs.push_back(new_tensor);
if (category != Tensor::Category::VAR) {
memcpy(new_tensor->MutableData(), input_data, new_tensor->Size());
} else {
subgraph_inputs.push_back(new_tensor);
subgraph_inputs_data[new_tensor] = input_data;
}
}
const std::vector<int> &output_shape = std::get<0>(output_info);
float *expect_data = std::get<1>(output_info);
auto output = Tensor(kNumberTypeFloat32, output_shape, Format_NHWC, Tensor::Category::VAR);

MS_LOG(DEBUG) << "create OpenCL Kernel";
auto primitive_type = static_cast<schema::PrimitiveType>(op_parameter->type_);
kernel::KernelKey key{kernel::kGPU, kernel_inputs.front()->data_type(), primitive_type};
auto creator = KernelRegistry::GetInstance()->GetCreator(key);
if (creator == nullptr) {
std::cerr << "get kernel registry function error: " << schema::EnumNamePrimitiveType(primitive_type) << std::endl;
free(op_parameter);
FAIL();
}
auto *kernel = creator(kernel_inputs, {&output}, op_parameter, nullptr, key, nullptr);
if (kernel == nullptr) {
std::cerr << "call kernel registry function error: " << schema::EnumNamePrimitiveType(primitive_type) << std::endl;
free(op_parameter);
FAIL();
}

MS_LOG(DEBUG) << "create SubGraph & init input data";
std::vector<LiteKernel *> kernels{kernel};
auto sub_graph = new (std::nothrow) SubGraphOpenCLKernel(subgraph_inputs, {&output}, kernels, kernels, kernels);
if (sub_graph == nullptr) {
return;
}
for (auto input : subgraph_inputs) {
EXPECT_TRUE(input->MallocData(allocator) == RET_OK);
}
EXPECT_TRUE(sub_graph->Init() == RET_OK);
for (auto input : subgraph_inputs) {
memcpy(input->data_c(), subgraph_inputs_data[input], input->Size());
}

MS_LOG(DEBUG) << "run SubGraph & compare result";
EXPECT_TRUE(sub_graph->Run() == RET_OK);
if (print_output) {
for (int i = 0; i < output.ElementsNum(); ++i) {
printf("%d: expect=%.3f output=%.3f\n", i, expect_data[i], reinterpret_cast<float *>(output.data_c())[i]);
}
}
CommonTest::CompareOutputData(reinterpret_cast<float *>(output.data_c()), expect_data, output.ElementsNum(), atol);

MS_LOG(DEBUG) << "release resources";
delete sub_graph;
}

} // namespace mindspore

+ 12
- 6
mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h View File

@@ -14,16 +14,18 @@
* limitations under the License.
*/

#ifndef TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_
#define TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_

#include <string>
#include <iostream>
#include "tests/ut/cpp/common/common_test.h"
#include "src/common/log_adapter.h"
#include <vector>
#include <tuple>
#include <map>
#include "mindspore/lite/src/tensor.h"
#include "mindspore/lite/src/common/file_utils.h"
#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h"
#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"

#ifndef TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_
#define TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_
using mindspore::lite::Tensor;

namespace mindspore {

@@ -63,6 +65,10 @@ void CompareOutput(lite::Tensor *output_tensor, const std::string &file_path, T
CompareOutput(output_tensor->data_c(), expect_data, output_tensor->ElementsNum(), atol, rtol);
}

void TestMain(const std::vector<std::tuple<std::vector<int>, float *, Tensor::Category>> &input_infos,
std::tuple<std::vector<int>, float *> output_info, OpParameter *op_parameter, bool fp16_enable = false,
float atol = 10e-9, bool print_output = false);

} // namespace mindspore

#endif // TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_

Loading…
Cancel
Save