| @@ -1,146 +0,0 @@ | |||
| #pragma OPENCL EXTENSION cl_khr_fp16 : enable | |||
| #define INT2 int2 | |||
| #define INT4 int4 | |||
| __constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; | |||
| __kernel void slice_NHWC4(__read_only image2d_t input, __write_only image2d_t output, INT4 input_shape, INT4 out_shape, | |||
| INT4 begin, INT2 sharedNoUpdiv) { | |||
| int X = get_global_id(1); // H | |||
| int Y = get_global_id(2); // W | |||
| if (X >= out_shape.y || Y >= out_shape.z) { | |||
| return; | |||
| } | |||
| FLT4 result; | |||
| if (sharedNoUpdiv.x % 4 == 0) { | |||
| for (int i = 0; i < out_shape.w; i++) { | |||
| result = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + (i + begin.w), (X + begin.y))); | |||
| WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i, (X)), result); | |||
| } | |||
| } else { | |||
| int begin_postion = sharedNoUpdiv.x % 4; | |||
| FLT4 first = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + begin.w, (X + begin.y))); | |||
| if (begin_postion == 1) { | |||
| for (int i = 1; i <= out_shape.w; i++) { | |||
| FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + (begin.w + i), (X + begin.y))); | |||
| result.x = first.y; | |||
| result.y = first.z; | |||
| result.z = first.w; | |||
| result.w = second.x; | |||
| WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i - 1, (X)), result); | |||
| first.y = second.y; | |||
| first.z = second.z; | |||
| first.w = second.w; | |||
| } | |||
| } else if (begin_postion == 2) { | |||
| for (int i = 1; i <= out_shape.w; i++) { | |||
| FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + (begin.w + i), (X + begin.y))); | |||
| result.x = first.z; | |||
| result.y = first.w; | |||
| result.z = second.x; | |||
| result.w = second.y; | |||
| WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i - 1, (X)), result); | |||
| first.z = second.z; | |||
| first.w = second.w; | |||
| } | |||
| } else { | |||
| for (int i = 1; i <= out_shape.w; i++) { | |||
| FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + (begin.w + i), (X + begin.y))); | |||
| result.x = first.w; | |||
| result.y = second.x; | |||
| result.z = second.y; | |||
| result.w = second.z; | |||
| WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i - 1, (X)), result); | |||
| first.w = second.w; | |||
| } | |||
| } | |||
| } | |||
| // judge the line of size | |||
| int size = sharedNoUpdiv.y % 4; | |||
| FLT4 result_fill0; | |||
| if (size == 1) { | |||
| result_fill0.x = result.x; | |||
| result_fill0.y = 0; | |||
| result_fill0.z = 0; | |||
| result_fill0.w = 0; | |||
| WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + out_shape.w - 1, (X)), result_fill0); | |||
| } else if (size == 2) { | |||
| result_fill0.x = result.x; | |||
| result_fill0.y = result.y; | |||
| result_fill0.z = 0; | |||
| result_fill0.w = 0; | |||
| WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + out_shape.w - 1, (X)), result_fill0); | |||
| } else if (size == 3) { | |||
| result_fill0.x = result.x; | |||
| result_fill0.y = result.y; | |||
| result_fill0.z = result.z; | |||
| result_fill0.w = 0; | |||
| WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + out_shape.w - 1, (X)), result_fill0); | |||
| } | |||
| } | |||
| __kernel void slice_NC4HW4(__read_only image2d_t input, __write_only image2d_t output, INT4 input_shape, INT4 out_shape, | |||
| INT4 begin, INT2 sharedNoUpdiv) { | |||
| int X = get_global_id(1); // H | |||
| int Y = get_global_id(2); // W | |||
| if (X >= out_shape.y || Y >= out_shape.z) { | |||
| return; | |||
| } | |||
| FLT4 result; | |||
| if (sharedNoUpdiv.x % 4 == 0) { | |||
| for (int i = 0; i < out_shape.w; i++) { | |||
| result = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y))); | |||
| WRITE_IMAGE(output, (INT2)((Y), (i * out_shape.y + X)), result); | |||
| } | |||
| } else { | |||
| int begin_postion = sharedNoUpdiv.x % 4; | |||
| FLT4 first = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (begin.w) * input_shape.y + (X + begin.y))); | |||
| if (begin_postion == 1) { | |||
| for (int i = 1; i <= out_shape.w; i++) { | |||
| FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y))); | |||
| result.x = first.y; | |||
| result.y = first.z; | |||
| result.z = first.w; | |||
| result.w = second.x; | |||
| WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result); | |||
| first.y = second.y; | |||
| first.z = second.z; | |||
| first.w = second.w; | |||
| } | |||
| } else if (begin_postion == 2) { | |||
| for (int i = 1; i <= out_shape.w; i++) { | |||
| FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y))); | |||
| result.x = first.z; | |||
| result.y = first.w; | |||
| result.z = second.x; | |||
| result.w = second.y; | |||
| WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result); | |||
| first.z = second.z; | |||
| first.w = second.w; | |||
| } | |||
| } else { | |||
| for (int i = 1; i <= out_shape.w; i++) { | |||
| FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y))); | |||
| result.x = first.w; | |||
| result.y = second.x; | |||
| result.z = second.y; | |||
| result.w = second.z; | |||
| WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result); | |||
| first.w = second.w; | |||
| } | |||
| } | |||
| } | |||
| // judge the line of size | |||
| int size = sharedNoUpdiv.y % 4; | |||
| FLT4 result_fill0 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f); | |||
| if (size == 1) { | |||
| result_fill0.x = result.x; | |||
| WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0); | |||
| } else if (size == 2) { | |||
| result_fill0.x = result.x; | |||
| result_fill0.y = result.y; | |||
| WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0); | |||
| } else if (size == 3) { | |||
| result_fill0.x = result.x; | |||
| result_fill0.y = result.y; | |||
| result_fill0.z = result.z; | |||
| WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0); | |||
| } | |||
| } | |||
| @@ -0,0 +1,59 @@ | |||
| #pragma OPENCL EXTENSION cl_khr_fp16 : enable | |||
| __constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; | |||
| __kernel void strided_slice(__read_only image2d_t input, __write_only image2d_t output, int4 input_shape, | |||
| int4 output_shape, int2 io_slices, int4 begin, int4 stride, int4 size) { | |||
| int IN = input_shape.x, IH = input_shape.y, IW = input_shape.z, CI = input_shape.w; | |||
| int ON = output_shape.x, OH = output_shape.y, OW = output_shape.z, CO = output_shape.w; | |||
| int CI_SLICES = io_slices.x, CO_SLICES = io_slices.y; | |||
| int on_oh = get_global_id(0); | |||
| int ow = get_global_id(1); | |||
| int co_slice = get_global_id(2); | |||
| int on = on_oh / OH; | |||
| int oh = on_oh % OH; | |||
| if (on >= ON || oh >= OH || ow >= OW || co_slice >= CO_SLICES) { | |||
| return; | |||
| } | |||
| FLT tmp[4]; | |||
| for (int i = 0; i < 4; ++i) { | |||
| // output_shape idx -> size idx. because squeeze(output_shape)=squeeze(size) | |||
| // for example: | |||
| // python code: B = A[1, 1:16, 2:16, 3:16] | |||
| // input_shape = [16, 16, 16, 16] | |||
| // begin = [ 1, 1, 2, 3] | |||
| // end = [ 2, 16, 16, 16] | |||
| // stride = [ 1, 1, 1, 1] | |||
| // size = [ 1, 15, 14, 13] = ceil((end - begin) / stride) | |||
| // output_shape = [ 15, 14, 13] | |||
| int idx = ((on * OH + oh) * OW + ow) * CO + co_slice * 4 + i; | |||
| int co_ = idx % size.w; | |||
| idx /= size.w; | |||
| int ow_ = idx % size.z; | |||
| idx /= size.z; | |||
| int oh_ = idx % size.y; | |||
| idx /= size.y; | |||
| int on_ = idx; | |||
| int in = begin.x + stride.x * on_; | |||
| int ih = begin.y + stride.y * oh_; | |||
| int iw = begin.z + stride.z * ow_; | |||
| int ci = begin.w + stride.w * co_; | |||
| FLT4 src = READ_IMAGE(input, smp_none, (int2)(iw * CI_SLICES + ci / 4, in * IH + ih)); | |||
| int offset = ci % 4; | |||
| if (offset == 0) { | |||
| tmp[i] = src.x; | |||
| } else if (offset == 1) { | |||
| tmp[i] = src.y; | |||
| } else if (offset == 2) { | |||
| tmp[i] = src.z; | |||
| } else { | |||
| tmp[i] = src.w; | |||
| } | |||
| } | |||
| FLT4 out = (FLT4)(tmp[0], tmp[1], tmp[2], tmp[3]); | |||
| WRITE_IMAGE(output, (int2)(ow * CO_SLICES + co_slice, on_oh), out); | |||
| } | |||
| @@ -1,106 +0,0 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <cstring> | |||
| #include <string> | |||
| #include <algorithm> | |||
| #include <set> | |||
| #include "src/kernel_registry.h" | |||
| #include "src/runtime/kernel/opencl/kernel/slice.h" | |||
| #include "src/runtime/kernel/opencl/utils.h" | |||
| #include "src/runtime/kernel/opencl/cl/slice.cl.inc" | |||
| using mindspore::kernel::KERNEL_ARCH::kGPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Slice; | |||
| namespace mindspore::kernel { | |||
| int SliceOpenCLKernel::Init() { | |||
| std::set<std::string> build_options; | |||
| std::string source = slice_source; | |||
| std::string program_name = "slice"; | |||
| std::string kernel_name = "slice_NHWC4"; | |||
| ocl_runtime_->LoadSource(program_name, source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| return RET_OK; | |||
| } | |||
| void SlcieGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *local, int max_size) { | |||
| const int max_divider = 8; | |||
| const int max_x = 4, max_y = 8; | |||
| int x = std::min(GetMaxDivisorStrategy1(global[0], max_divider), max_x); | |||
| int yz = max_size / x; | |||
| int y = std::min(std::min(GetMaxDivisorStrategy1(global[1], max_divider), yz), max_y); | |||
| int z = std::min(yz / y, static_cast<int>(UP_DIV(global[2], 2))); | |||
| local->clear(); | |||
| local->push_back(x); | |||
| local->push_back(y); | |||
| local->push_back(z); | |||
| } | |||
| int SliceOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running! "; | |||
| auto param = reinterpret_cast<SliceParameter *>(this->op_parameter_); | |||
| auto input_shape = in_tensors_[0]->shape(); | |||
| cl_int4 input_shape_ = {input_shape[0], input_shape[1], input_shape[2], UP_DIV(input_shape[3], C4NUM)}; | |||
| cl_int4 size_ = {param->size_[0], param->size_[1], param->size_[2], UP_DIV(param->size_[3], C4NUM)}; | |||
| cl_int4 begin_ = {param->begin_[0], param->begin_[1], param->begin_[2], param->begin_[3] / 4}; | |||
| cl_int2 sharedNoUpdiv = {param->begin_[3], param->size_[3]}; | |||
| uint32_t OH = param->size_[1]; | |||
| uint32_t OW = param->size_[2]; | |||
| const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize(); | |||
| std::vector<size_t> local = {1, 1, 1}; // init local | |||
| std::vector<size_t> global = {1, OH, OW}; | |||
| SlcieGetWorkGroup(global, &local, max_global[0]); | |||
| int arg_cn = 0; | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, size_); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, sharedNoUpdiv); | |||
| ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *OpenCLSliceKernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | |||
| const lite::InnerContext *ctx, const kernel::KernelKey &desc, | |||
| const mindspore::lite::PrimitiveC *primitive) { | |||
| auto *kernel = new (std::nothrow) SliceOpenCLKernel(opParameter, inputs, outputs); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << " new SliceOpenCLKernel failed "; | |||
| free(opParameter); | |||
| return nullptr; | |||
| } | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << " Init kernel failed, name: Slice "; | |||
| delete kernel; | |||
| return nullptr; | |||
| } | |||
| return kernel; | |||
| } | |||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Slice, OpenCLSliceKernelCreator); | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Slice, OpenCLSliceKernelCreator); | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,192 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <cstring> | |||
| #include <deque> | |||
| #include <string> | |||
| #include <algorithm> | |||
| #include <set> | |||
| #include "src/kernel_registry.h" | |||
| #include "src/runtime/kernel/opencl/kernel/strided_slice.h" | |||
| #include "src/runtime/kernel/opencl/utils.h" | |||
| #include "src/runtime/kernel/opencl/cl/strided_slice.cl.inc" | |||
| #include "nnacl/strided_slice.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kGPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Slice; | |||
| using mindspore::schema::PrimitiveType_StridedSlice; | |||
| namespace mindspore::kernel { | |||
| int SliceOpenCLKernel::CheckSpecs() { | |||
| const std::string kernel_name = op_parameter_->type_ == PrimitiveType_Slice ? "Slice" : "StridedSlice"; | |||
| if (in_tensors_.size() != 1) { | |||
| MS_LOG(ERROR) << kernel_name + " only supports 1 input Tensor."; | |||
| return RET_ERROR; | |||
| } | |||
| if (out_tensors_.size() != 1) { | |||
| MS_LOG(ERROR) << kernel_name + " only supports 1 output Tensor."; | |||
| return RET_ERROR; | |||
| } | |||
| auto in_ndim = in_tensors_.front()->shape().size(); | |||
| if (in_ndim == 0 || in_ndim > 4) { | |||
| MS_LOG(ERROR) << kernel_name + " only supports 1D-4D input tensor"; | |||
| return RET_ERROR; | |||
| } | |||
| auto out_ndim = out_tensors_.front()->shape().size(); | |||
| if (out_ndim > 4) { | |||
| MS_LOG(ERROR) << kernel_name + " only supports 0D-4D output tensor"; | |||
| return RET_ERROR; | |||
| } | |||
| if (InitConstArgs() != RET_OK) { | |||
| MS_LOG(ERROR) << "call SliceOpenCLKernel::InitConstArgs() failed"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int SliceOpenCLKernel::Prepare() { | |||
| std::set<std::string> build_options; | |||
| std::string program_name = "strided_slice"; | |||
| ocl_runtime_->LoadSource(program_name, strided_slice_source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, "strided_slice", build_options); | |||
| SetConstArgs(); | |||
| SetGlobalLocal(); | |||
| return RET_OK; | |||
| } | |||
| int SliceOpenCLKernel::InitConstArgs() { | |||
| auto input_info = Image2DInfo(in_tensors_.front()); | |||
| auto output_info = Image2DInfo(out_tensors_.front()); | |||
| input_shape_ = {static_cast<cl_int>(input_info.N), static_cast<cl_int>(input_info.H), | |||
| static_cast<cl_int>(input_info.W), static_cast<cl_int>(input_info.C)}; | |||
| output_shape_ = {static_cast<cl_int>(output_info.N), static_cast<cl_int>(output_info.H), | |||
| static_cast<cl_int>(output_info.W), static_cast<cl_int>(output_info.C)}; | |||
| io_slices_ = {static_cast<cl_int>(input_info.Slice), static_cast<cl_int>(output_info.Slice)}; | |||
| if (op_parameter_->type_ == PrimitiveType_Slice) { | |||
| auto param = reinterpret_cast<SliceParameter *>(op_parameter_); | |||
| Broadcast2GpuShape(param->begin_, begin_.s, param->param_length_, 0); | |||
| Broadcast2GpuShape(param->size_, size_.s, param->param_length_, -1); | |||
| for (int i = 0; i < 4; ++i) { | |||
| if (begin_.s[i] < 0) { | |||
| begin_.s[i] += input_shape_.s[i]; | |||
| } | |||
| if (begin_.s[i] < 0 || begin_.s[i] >= input_shape_.s[i]) { | |||
| MS_LOG(ERROR) << "Slice kernel only supports 0<=begin<input_shape but begin[i]=" << begin_.s[i] | |||
| << " input_shape[i]=" << input_shape_.s[i]; | |||
| return RET_ERROR; | |||
| } | |||
| if (size_.s[i] < -1 || size_.s[i] == 0) { | |||
| MS_LOG(ERROR) << "Slice kernel only supports size=-1 or size>0 but size[i]=" << size_.s[i]; | |||
| return RET_ERROR; | |||
| } | |||
| if (size_.s[i] == -1 || begin_.s[i] + size_.s[i] > input_shape_.s[i]) { | |||
| size_.s[i] = input_shape_.s[i] - begin_.s[i]; | |||
| } | |||
| } | |||
| } else { | |||
| auto param = reinterpret_cast<StridedSliceParameter *>(op_parameter_); | |||
| cl_int4 end = input_shape_; | |||
| Broadcast2GpuShape(param->begins_, begin_.s, param->num_axes_, 0); | |||
| Broadcast2GpuShape(param->strides_, stride_.s, param->num_axes_, 1); | |||
| Broadcast2GpuShape(param->ends_, end.s, param->num_axes_); | |||
| for (int i = 0; i < 4; ++i) { | |||
| // begin is negative | |||
| if (begin_.s[i] < 0) { | |||
| begin_.s[i] += input_shape_.s[i]; | |||
| } | |||
| // avoid begin is out of range | |||
| begin_.s[i] = std::clamp(begin_.s[i], 0, input_shape_.s[i] - 1); | |||
| // end is negative | |||
| if (end.s[i] < 0) { | |||
| end.s[i] += input_shape_.s[i]; | |||
| } | |||
| // avoid end is out of range | |||
| end.s[i] = std::clamp(end.s[i], -1, input_shape_.s[i]); | |||
| // check stride begin end | |||
| if (stride_.s[i] > 0) { | |||
| if (begin_.s[i] >= end.s[i]) { | |||
| MS_LOG(ERROR) << "StridedSlice kernel only supports begin_<end when stride>0"; | |||
| return RET_ERROR; | |||
| } | |||
| } else if (stride_.s[i] < 0) { | |||
| if (begin_.s[i] <= end.s[i]) { | |||
| MS_LOG(ERROR) << "StridedSlice kernel only supports begin_>end when stride<0"; | |||
| return RET_ERROR; | |||
| } | |||
| } else { | |||
| MS_LOG(ERROR) << "StridedSlice kernel only supports stride!=0"; | |||
| return RET_ERROR; | |||
| } | |||
| size_.s[i] = std::ceil(static_cast<float>(end.s[i] - begin_.s[i]) / static_cast<float>(stride_.s[i])); | |||
| } | |||
| } | |||
| // check size | |||
| std::vector<int> shape_not_1; | |||
| std::vector<int> size_not_1; | |||
| std::copy_if(out_tensors_.front()->shape().begin(), out_tensors_.front()->shape().end(), shape_not_1.begin(), | |||
| [](int x) { return x > 1; }); | |||
| std::copy_if(size_.s, size_.s + 4, size_not_1.begin(), [](int x) { return x > 1; }); | |||
| if (shape_not_1 != size_not_1) { | |||
| MS_LOG(ERROR) << "Slice/StridedSlice kernel output shape infer error"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| void SliceOpenCLKernel::SetConstArgs() { | |||
| int arg_cn = 2; | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices_); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn, size_); | |||
| } | |||
| void SliceOpenCLKernel::SetGlobalLocal() { | |||
| auto output_info = Image2DInfo(out_tensors_.front()); | |||
| std::vector<size_t> global = {output_info.N * output_info.H, output_info.W, output_info.Slice}; | |||
| const int max_divider = 8; | |||
| auto max_work_group_size = ocl_runtime_->DeviceMaxWorkGroupSize(); | |||
| size_t local_c = GetMaxDivisorStrategy0(global[2], max_divider); | |||
| size_t local_hw = max_work_group_size / local_c; | |||
| size_t local_h = std::min(UP_DIV(global[0], 2), local_hw); | |||
| size_t local_w = std::min(local_hw / local_h, global[1]); | |||
| std::vector<size_t> local = {local_h, local_w, local_c}; | |||
| AlignGlobalLocal(global, local); | |||
| } | |||
| int SliceOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running! "; | |||
| ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); | |||
| ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); | |||
| ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); | |||
| return RET_OK; | |||
| } | |||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Slice, OpenCLKernelCreator<SliceOpenCLKernel>); | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Slice, OpenCLKernelCreator<SliceOpenCLKernel>); | |||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_StridedSlice, OpenCLKernelCreator<SliceOpenCLKernel>); | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_StridedSlice, OpenCLKernelCreator<SliceOpenCLKernel>); | |||
| } // namespace mindspore::kernel | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SLICE_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SLICE_H_ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_STRIDED_SLICE_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_STRIDED_SLICE_H_ | |||
| #include <vector> | |||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | |||
| @@ -31,12 +31,23 @@ class SliceOpenCLKernel : public OpenCLKernel { | |||
| ~SliceOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int Prepare() override; | |||
| int Run() override; | |||
| int CheckSpecs() override; | |||
| void SetConstArgs() override; | |||
| void SetGlobalLocal() override; | |||
| private: | |||
| int InitConstArgs(); | |||
| cl::Kernel kernel_; | |||
| cl_int4 input_shape_{}; | |||
| cl_int4 output_shape_{}; | |||
| cl_int2 io_slices_{}; | |||
| cl_int4 begin_{}; | |||
| cl_int4 stride_{{1, 1, 1, 1}}; | |||
| cl_int4 size_{}; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -34,33 +34,67 @@ struct OpenCLToFormatParameter { | |||
| lite::opencl::MemType out_mem_type{lite::opencl::MemType::IMG}; | |||
| }; | |||
| template <typename SrcT, typename DstT> | |||
| void Broadcast2GpuShape(const SrcT *src, DstT *dst, int src_num) { | |||
| auto *N = dst; | |||
| auto *H = dst + 1; | |||
| auto *W = dst + 2; | |||
| auto *C = dst + 3; | |||
| if (src_num == 1) { | |||
| *N = src[0]; | |||
| } else if (src_num == 2) { | |||
| *N = src[0]; | |||
| *C = src[1]; | |||
| } else if (src_num == 3) { | |||
| *N = src[0]; | |||
| *W = src[1]; | |||
| *C = src[2]; | |||
| } else if (src_num == 4) { | |||
| *N = src[0]; | |||
| *H = src[1]; | |||
| *W = src[2]; | |||
| *C = src[3]; | |||
| } else if (src_num >= 5) { | |||
| MS_LOG(ERROR) << "GPU doesn't support ndim>=" << src_num; | |||
| } | |||
| } | |||
| template <typename SrcT, typename DstT> | |||
| void Broadcast2GpuShape(const SrcT *src, DstT *dst, int src_num, DstT default_value) { | |||
| for (int i = 0; i < 4; ++i) { | |||
| dst[i] = default_value; | |||
| } | |||
| Broadcast2GpuShape(src, dst, src_num); | |||
| } | |||
| struct Image2DInfo { | |||
| explicit Image2DInfo(const lite::Tensor *tensor) { | |||
| if (tensor == nullptr) { | |||
| return; | |||
| } | |||
| auto shape = tensor->shape(); | |||
| if (shape.size() == 1) { | |||
| auto ndim = shape.size(); | |||
| if (ndim == 1) { | |||
| N = shape[0]; | |||
| } else if (shape.size() == 2) { | |||
| } else if (ndim == 2) { | |||
| N = shape[0]; | |||
| C = shape[1]; | |||
| } else if (shape.size() == 3) { | |||
| } else if (ndim == 3) { | |||
| N = shape[0]; | |||
| W = shape[1]; | |||
| C = shape[2]; | |||
| } else if (shape.size() == 4) { | |||
| } else if (ndim == 4) { | |||
| N = shape[0]; | |||
| H = shape[1]; | |||
| W = shape[2]; | |||
| C = shape[3]; | |||
| } else if (shape.size() >= 5) { | |||
| MS_LOG(ERROR) << "GPU dont't support Tensor with dim=" << shape.size(); | |||
| } else if (ndim >= 5) { | |||
| MS_LOG(ERROR) << "GPU doesn't support Tensor with ndim>=" << ndim; | |||
| } | |||
| Slice = UP_DIV(C, C4NUM); | |||
| FLT_size = tensor->data_type() == kNumberTypeFloat16 ? sizeof(cl_half) : sizeof(cl_float); | |||
| FLT4_size = FLT_size * 4; | |||
| Slice = UP_DIV(C, C4NUM); | |||
| if (W * Slice <= MAX_IMAGE2D_SIZE) { | |||
| height = N * H; | |||
| width = W * Slice; | |||
| @@ -13,357 +13,151 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include "src/common/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | |||
| #include "mindspore/lite/src/common/file_utils.h" | |||
| #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | |||
| #include "mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h" | |||
| #include "nnacl/slice_parameter.h" | |||
| #include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" | |||
| namespace mindspore { | |||
| class TestSliceOpenCLfp32 : public mindspore::CommonTest { | |||
| public: | |||
| TestSliceOpenCLfp32() {} | |||
| }; | |||
| class TestSliceOpenCLfp16 : public mindspore::CommonTest { | |||
| public: | |||
| TestSliceOpenCLfp16() {} | |||
| }; | |||
| template <typename T> | |||
| void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bound) { | |||
| for (size_t i = 0; i < size; i++) { | |||
| T abs = fabs(output_data[i] - correct_data[i]); | |||
| ASSERT_LE(abs, err_bound); | |||
| } | |||
| } | |||
| TEST_F(TestSliceOpenCLfp32, Slicefp32CI) { | |||
| MS_LOG(INFO) << " begin test "; | |||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||
| ocl_runtime->Init(); | |||
| auto allocator = ocl_runtime->GetAllocator(); | |||
| MS_LOG(INFO) << " Read tensors from .bin "; | |||
| std::vector<int> input_shape = {1, 2, 2, 8}; | |||
| std::vector<int> output_shape = {1, 2, 2, 5}; | |||
| std::vector<int> begin = {0, 0, 0, 2}; | |||
| std::vector<int> size = {1, 2, 2, 5}; | |||
| auto data_type = kNumberTypeFloat32; | |||
| auto tensor_type = lite::Tensor::CONST_TENSOR; | |||
| float input_data[] = {-0.45816937, 0.92391545, -0.9135602, -1.4002057, 1.1080881, 0.40712625, -0.28128958, | |||
| 0.09470133, 0.19801073, 0.04927751, -1.2808367, 0.1470597, 0.03393711, -0.33282498, | |||
| -1.0433807, -1.3678077, -0.6423931, 0.5584889, 0.28965706, 0.5343769, 0.75480366, | |||
| -1.9328151, -0.48714373, 1.711132, -1.8871949, -0.2987629, -0.14000037, -0.080552, | |||
| 0.95056856, -0.06886655, 0.5316237, 0.05787678}; | |||
| float correct_data[] = {-0.9135602, -1.4002057, 1.1080881, 0.40712625, -0.28128958, -1.2808367, 0.1470597, | |||
| 0.03393711, -0.33282498, -1.0433807, 0.28965706, 0.5343769, 0.75480366, -1.9328151, | |||
| -0.48714373, -0.14000037, -0.080552, 0.95056856, -0.06886655, 0.5316237}; | |||
| MS_LOG(INFO) << " construct tensors "; | |||
| lite::Tensor *tensor_data = new (std::nothrow) lite::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type); | |||
| if (tensor_data == nullptr) { | |||
| MS_LOG(INFO) << " init tensor failed "; | |||
| return; | |||
| } | |||
| auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type); | |||
| if (output_tensor == nullptr) { | |||
| delete tensor_data; | |||
| MS_LOG(INFO) << " init tensor failed "; | |||
| return; | |||
| } | |||
| std::vector<lite::Tensor *> inputs = {tensor_data}; | |||
| std::vector<lite::Tensor *> outputs = {output_tensor}; | |||
| class TestSliceOpenCL : public mindspore::CommonTest {}; | |||
| MS_LOG(INFO) << "setting SliceParameter "; | |||
| auto param = reinterpret_cast<SliceParameter *>(malloc(sizeof(SliceParameter))); | |||
| OpParameter *GetSliceParameter(const std::vector<int> &begin, const std::vector<int> &size) { | |||
| auto param = static_cast<SliceParameter *>(malloc(sizeof(SliceParameter))); | |||
| if (param == nullptr) { | |||
| for (auto tensor : inputs) { | |||
| delete tensor; | |||
| } | |||
| for (auto tensor : outputs) { | |||
| delete tensor; | |||
| } | |||
| MS_LOG(INFO) << "new SliceParameter failed "; | |||
| return; | |||
| MS_LOG(ERROR) << "SliceParameter create error."; | |||
| return nullptr; | |||
| } | |||
| for (int i = 0; i < input_shape.size(); i++) { | |||
| param->op_parameter_.type_ = schema::PrimitiveType_Slice; | |||
| param->param_length_ = begin.size(); | |||
| for (int i = 0; i < begin.size(); ++i) { | |||
| param->begin_[i] = begin[i]; | |||
| param->size_[i] = size[i]; | |||
| } | |||
| auto *slice_kernel = | |||
| new (std::nothrow) kernel::SliceOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | |||
| if (slice_kernel == nullptr) { | |||
| for (auto tensor : inputs) { | |||
| delete tensor; | |||
| } | |||
| for (auto tensor : outputs) { | |||
| delete tensor; | |||
| } | |||
| delete param; | |||
| MS_LOG(INFO) << "new kernel::slice_kernel failed "; | |||
| return; | |||
| } | |||
| slice_kernel->Init(); | |||
| // to do allocate memory for inputs and outputs | |||
| for (auto &input_tensor : inputs) { | |||
| input_tensor->MallocData(allocator); | |||
| } | |||
| MS_LOG(INFO) << " initialize sub_graph "; | |||
| std::vector<kernel::LiteKernel *> kernels{slice_kernel}; | |||
| auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||
| if (sub_graph == nullptr) { | |||
| for (auto tensor : inputs) { | |||
| delete tensor; | |||
| } | |||
| for (auto tensor : outputs) { | |||
| delete tensor; | |||
| } | |||
| delete param; | |||
| delete slice_kernel; | |||
| MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; | |||
| return; | |||
| } | |||
| sub_graph->Init(); | |||
| MS_LOG(INFO) << " init tensors "; | |||
| memcpy(inputs[0]->data_c(), input_data, sizeof(input_data)); | |||
| std::cout << "==================output data================" << std::endl; | |||
| sub_graph->Run(); | |||
| auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | |||
| CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | |||
| for (auto tensor : inputs) { | |||
| tensor->set_data(nullptr); | |||
| delete tensor; | |||
| } | |||
| for (auto tensor : outputs) { | |||
| tensor->set_data(nullptr); | |||
| delete tensor; | |||
| } | |||
| delete sub_graph; | |||
| return reinterpret_cast<OpParameter *>(param); | |||
| } | |||
| TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { | |||
| MS_LOG(INFO) << " begin test "; | |||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||
| ocl_runtime->Init(); | |||
| auto allocator = ocl_runtime->GetAllocator(); | |||
| MS_LOG(INFO) << " Read tensors from .bin "; | |||
| std::vector<int> input_shape = {1, 19, 19, 96}; | |||
| std::vector<int> output_shape = {1, 10, 10, 13}; | |||
| std::vector<int> begin = {0, 2, 3, 4}; | |||
| std::vector<int> size = {1, 10, 10, 13}; | |||
| auto data_type = kNumberTypeFloat32; | |||
| auto tensor_type = lite::Tensor::CONST_TENSOR; | |||
| // get the input from .bin | |||
| size_t input_size, output_size; | |||
| std::string input_path = "./test_data/in_slicefp32.bin"; | |||
| std::string output_path = "./test_data/out_slicefp32.bin"; | |||
| auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| auto correct_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(output_path.c_str(), &output_size)); | |||
| MS_LOG(INFO) << " construct tensors "; | |||
| lite::Tensor *tensor_data = new (std::nothrow) lite::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type); | |||
| if (tensor_data == nullptr) { | |||
| MS_LOG(INFO) << " init tensor failed "; | |||
| return; | |||
| } | |||
| auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type); | |||
| if (output_tensor == nullptr) { | |||
| delete tensor_data; | |||
| MS_LOG(INFO) << " init tensor failed "; | |||
| return; | |||
| } | |||
| std::vector<lite::Tensor *> inputs = {tensor_data}; | |||
| std::vector<lite::Tensor *> outputs = {output_tensor}; | |||
| MS_LOG(INFO) << "setting SliceParameter "; | |||
| auto param = reinterpret_cast<SliceParameter *>(malloc(sizeof(SliceParameter))); | |||
| if (param == nullptr) { | |||
| for (auto tensor : inputs) { | |||
| delete tensor; | |||
| } | |||
| for (auto tensor : outputs) { | |||
| delete tensor; | |||
| } | |||
| MS_LOG(INFO) << "new SliceParameter failed "; | |||
| return; | |||
| } | |||
| for (int i = 0; i < input_shape.size(); i++) { | |||
| param->begin_[i] = begin[i]; | |||
| param->size_[i] = size[i]; | |||
| } | |||
| auto *slice_kernel = | |||
| new (std::nothrow) kernel::SliceOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | |||
| if (slice_kernel == nullptr) { | |||
| for (auto tensor : inputs) { | |||
| delete tensor; | |||
| } | |||
| for (auto tensor : outputs) { | |||
| delete tensor; | |||
| } | |||
| delete param; | |||
| MS_LOG(INFO) << "new kernel::slice_kernel failed "; | |||
| return; | |||
| } | |||
| slice_kernel->Init(); | |||
| // to do allocate memory for inputs and outputs | |||
| for (auto &input_tensor : inputs) { | |||
| input_tensor->MallocData(allocator); | |||
| } | |||
| MS_LOG(INFO) << " initialize sub_graph "; | |||
| std::vector<kernel::LiteKernel *> kernels{slice_kernel}; | |||
| auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||
| if (sub_graph == nullptr) { | |||
| for (auto tensor : inputs) { | |||
| delete tensor; | |||
| } | |||
| for (auto tensor : outputs) { | |||
| delete tensor; | |||
| } | |||
| delete param; | |||
| delete slice_kernel; | |||
| MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; | |||
| return; | |||
| } | |||
| sub_graph->Init(); | |||
| MS_LOG(INFO) << " init tensors "; | |||
| memcpy(inputs[0]->data_c(), input_data, input_size); | |||
| std::cout << "==================output data================" << std::endl; | |||
| sub_graph->Run(); | |||
| auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | |||
| CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | |||
| for (auto tensor : inputs) { | |||
| tensor->set_data(nullptr); | |||
| delete tensor; | |||
| } | |||
| for (auto tensor : outputs) { | |||
| tensor->set_data(nullptr); | |||
| delete tensor; | |||
| } | |||
| delete sub_graph; | |||
| TEST_F(TestSliceOpenCL, 4D) { | |||
| float input_data[] = {-0.45816937, 0.92391545, -0.9135602, -1.4002057, 1.1080881, 0.40712625, -0.28128958, | |||
| 0.09470133, 0.19801073, 0.04927751, -1.2808367, 0.1470597, 0.03393711, -0.33282498, | |||
| -1.0433807, -1.3678077, -0.6423931, 0.5584889, 0.28965706, 0.5343769, 0.75480366, | |||
| -1.9328151, -0.48714373, 1.711132, -1.8871949, -0.2987629, -0.14000037, -0.080552, | |||
| 0.95056856, -0.06886655, 0.5316237, 0.05787678}; | |||
| float expect_data[] = {-0.9135602, -1.4002057, 1.1080881, 0.40712625, -0.28128958, -1.2808367, 0.1470597, | |||
| 0.03393711, -0.33282498, -1.0433807, 0.28965706, 0.5343769, 0.75480366, -1.9328151, | |||
| -0.48714373, -0.14000037, -0.080552, 0.95056856, -0.06886655, 0.5316237}; | |||
| auto param = GetSliceParameter({0, 0, 0, 2}, {1, 2, 2, 5}); | |||
| TestMain({{{1, 2, 2, 8}, input_data, Tensor::Category::VAR}}, {{1, 2, 2, 5}, expect_data}, param, false); | |||
| } | |||
| TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { | |||
| MS_LOG(INFO) << " begin test "; | |||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||
| ocl_runtime->SetFp16Enable(true); | |||
| ocl_runtime->Init(); | |||
| auto allocator = ocl_runtime->GetAllocator(); | |||
| MS_LOG(INFO) << " Read tensors from .bin "; | |||
| std::vector<int> input_shape = {1, 25, 25, 48}; | |||
| std::vector<int> output_shape = {1, 24, 24, 15}; | |||
| std::vector<int> begin = {0, 1, 1, 7}; | |||
| std::vector<int> size = {1, 24, 24, 15}; | |||
| auto data_type = kNumberTypeFloat16; | |||
| auto tensor_type = lite::Tensor::CONST_TENSOR; | |||
| // get the input from .bin | |||
| size_t input_size, output_size; | |||
| std::string input_path = "./test_data/in_slicefp16.bin"; | |||
| std::string output_path = "./test_data/out_slicefp16.bin"; | |||
| auto input_data = reinterpret_cast<float16_t *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); | |||
| auto correct_data = reinterpret_cast<float16_t *>(mindspore::lite::ReadFile(output_path.c_str(), &output_size)); | |||
| MS_LOG(INFO) << " construct tensors "; | |||
| lite::Tensor *tensor_data = new (std::nothrow) lite::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type); | |||
| if (tensor_data == nullptr) { | |||
| MS_LOG(INFO) << " init tensor failed "; | |||
| return; | |||
| } | |||
| auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC4, tensor_type); | |||
| if (output_tensor == nullptr) { | |||
| delete tensor_data; | |||
| MS_LOG(INFO) << " init tensor failed "; | |||
| return; | |||
| TEST_F(TestSliceOpenCL, tflite_cpu) { | |||
| std::vector<std::tuple<std::string, std::vector<int>, std::vector<int>, std::vector<float>, std::vector<float>, | |||
| std::vector<int>, std::vector<int>>> | |||
| cases = {{"In1D", {4}, {2}, {1, 2, 3, 4}, {2, 3}, {1}, {2}}, | |||
| {"In2D", {2, 3}, {1, 2}, {1, 2, 3, 4, 5, 6}, {4, 5}, {1, 0}, {1, 2}}, | |||
| {"In3D", | |||
| {2, 3, 2}, | |||
| {2, 3, 2}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {0, 0, 0}, | |||
| {2, 3, 2}}, | |||
| {"InputFloat", {4, 1, 1, 1}, {3, 1, 1, 1}, {1, 2, 3, 4}, {2, 3, 4}, {1, 0, 0, 0}, {3, 1, 1, 1}}, | |||
| {"IndexInt64", {4, 1, 1, 1}, {3, 1, 1, 1}, {1, 2, 3, 4}, {2, 3, 4}, {1, 0, 0, 0}, {3, 1, 1, 1}}, | |||
| {"InputInteger1", | |||
| {3, 2, 3, 1}, | |||
| {1, 1, 3, 1}, | |||
| {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, | |||
| {3, 3, 3}, | |||
| {1, 0, 0, 0}, | |||
| {1, 1, 3, 1}}, | |||
| {"InputInteger2", | |||
| {3, 2, 3, 1}, | |||
| {1, 2, 3, 1}, | |||
| {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, | |||
| {3, 3, 3, 4, 4, 4}, | |||
| {1, 0, 0, 0}, | |||
| {1, 2, 3, 1}}, | |||
| {"InputInteger3", | |||
| {3, 2, 3, 1}, | |||
| {2, 1, 3, 1}, | |||
| {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, | |||
| {3, 3, 3, 5, 5, 5}, | |||
| {1, 0, 0, 0}, | |||
| {2, 1, 3, 1}}, | |||
| {"SizeMinus1", | |||
| {3, 2, 3, 1}, | |||
| {2, 1, 3, 1}, | |||
| {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, | |||
| {3, 3, 3, 5, 5, 5}, | |||
| {1, 0, 0, 0}, | |||
| {2, 1, -1, 1}}, | |||
| {"BeginNonZeroSizeMinus1Axis1", | |||
| {3, 3, 2, 1}, | |||
| {2, 2, 1, 1}, | |||
| {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9}, | |||
| {5, 6, 8, 9}, | |||
| {1, 1, 0, 0}, | |||
| {2, -1, 1, 1}}, | |||
| {"BeginNonZeroSizeMinus1Axis2", | |||
| {3, 2, 3, 1}, | |||
| {2, 1, 2, 1}, | |||
| {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, | |||
| {3, 3, 5, 5}, | |||
| {1, 0, 1, 0}, | |||
| {2, 1, -1, 1}}, | |||
| {"BeginNonZeroSizeMinus1Axis3", | |||
| {3, 1, 2, 3}, | |||
| {2, 1, 1, 2}, | |||
| {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, | |||
| {3, 3, 5, 5}, | |||
| {1, 0, 0, 1}, | |||
| {2, 1, 1, -1}}, | |||
| {"SliceUint8", | |||
| {3, 2, 3, 1}, | |||
| {2, 1, 3, 1}, | |||
| {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, | |||
| {3, 3, 3, 5, 5, 5}, | |||
| {1, 0, 0, 0}, | |||
| {2, 1, -1, 1}}, | |||
| {"SliceInt8", | |||
| {3, 2, 3, 1}, | |||
| {2, 1, 3, 1}, | |||
| {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, | |||
| {3, 3, 3, 5, 5, 5}, | |||
| {1, 0, 0, 0}, | |||
| {2, 1, -1, 1}}, | |||
| {"SliceInt16", | |||
| {3, 2, 3, 1}, | |||
| {2, 1, 3, 1}, | |||
| {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, | |||
| {3, 3, 3, 5, 5, 5}, | |||
| {1, 0, 0, 0}, | |||
| {2, 1, -1, 1}}, | |||
| {"SliceInt64", | |||
| {3, 2, 3, 1}, | |||
| {2, 1, 3, 1}, | |||
| {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, | |||
| {3, 3, 3, 5, 5, 5}, | |||
| {1, 0, 0, 0}, | |||
| {2, 1, -1, 1}}}; | |||
| for (auto &case_ : cases) { | |||
| auto &name = std::get<0>(case_); | |||
| auto &input_shape = std::get<1>(case_); | |||
| auto &output_shape = std::get<2>(case_); | |||
| auto &input_data = std::get<3>(case_); | |||
| auto &expect_data = std::get<4>(case_); | |||
| auto &begin = std::get<5>(case_); | |||
| auto &size = std::get<6>(case_); | |||
| std::cout << name << std::endl; | |||
| auto *param = GetSliceParameter(begin, size); | |||
| TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param, | |||
| false); | |||
| param = GetSliceParameter(begin, size); | |||
| TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param, | |||
| true); | |||
| } | |||
| std::vector<lite::Tensor *> inputs = {tensor_data}; | |||
| std::vector<lite::Tensor *> outputs = {output_tensor}; | |||
| MS_LOG(INFO) << " setting SliceParameter "; | |||
| auto param = reinterpret_cast<SliceParameter *>(malloc(sizeof(SliceParameter))); | |||
| if (param == nullptr) { | |||
| for (auto tensor : inputs) { | |||
| delete tensor; | |||
| } | |||
| for (auto tensor : outputs) { | |||
| delete tensor; | |||
| } | |||
| MS_LOG(INFO) << " new SliceParameter failed "; | |||
| return; | |||
| } | |||
| for (int i = 0; i < input_shape.size(); i++) { | |||
| param->begin_[i] = begin[i]; | |||
| param->size_[i] = size[i]; | |||
| } | |||
| auto *slice_kernel = | |||
| new (std::nothrow) kernel::SliceOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | |||
| if (slice_kernel == nullptr) { | |||
| for (auto tensor : inputs) { | |||
| delete tensor; | |||
| } | |||
| for (auto tensor : outputs) { | |||
| delete tensor; | |||
| } | |||
| delete param; | |||
| MS_LOG(INFO) << " new kernel::slice_kernel failed "; | |||
| return; | |||
| } | |||
| slice_kernel->Init(); | |||
| // to do allocate memory for inputs and outputs | |||
| for (auto &input_tensor : inputs) { | |||
| input_tensor->MallocData(allocator); | |||
| } | |||
| MS_LOG(INFO) << " initialize sub_graph "; | |||
| std::vector<kernel::LiteKernel *> kernels{slice_kernel}; | |||
| auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||
| if (sub_graph == nullptr) { | |||
| for (auto tensor : inputs) { | |||
| delete tensor; | |||
| } | |||
| for (auto tensor : outputs) { | |||
| delete tensor; | |||
| } | |||
| delete param; | |||
| delete slice_kernel; | |||
| MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; | |||
| return; | |||
| } | |||
| sub_graph->Init(); | |||
| MS_LOG(INFO) << " init tensors "; | |||
| memcpy(inputs[0]->data_c(), input_data, input_size); | |||
| } // namespace mindspore | |||
| std::cout << "==================output data================" << std::endl; | |||
| sub_graph->Run(); | |||
| auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c()); | |||
| CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | |||
| for (auto tensor : inputs) { | |||
| tensor->set_data(nullptr); | |||
| delete tensor; | |||
| } | |||
| for (auto tensor : outputs) { | |||
| tensor->set_data(nullptr); | |||
| delete tensor; | |||
| } | |||
| delete sub_graph; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,317 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "common/common_test.h" | |||
| #include "nnacl/strided_slice.h" | |||
| #include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" | |||
| namespace mindspore { | |||
| class TestStridedSliceOpenCL : public mindspore::CommonTest {}; | |||
| OpParameter *GetStridedSliceParameter(const std::vector<int> &begins, const std::vector<int> &ends, | |||
| const std::vector<int> &strides) { | |||
| auto param = static_cast<StridedSliceParameter *>(malloc(sizeof(StridedSliceParameter))); | |||
| if (param == nullptr) { | |||
| MS_LOG(ERROR) << "create StridedSliceParameter error."; | |||
| return nullptr; | |||
| } | |||
| param->op_parameter_.type_ = schema::PrimitiveType_StridedSlice; | |||
| param->num_axes_ = begins.size(); | |||
| for (int i = 0; i < begins.size(); ++i) { | |||
| param->begins_[i] = begins[i]; | |||
| param->ends_[i] = ends[i]; | |||
| param->strides_[i] = strides[i]; | |||
| } | |||
| return reinterpret_cast<OpParameter *>(param); | |||
| } | |||
| TEST_F(TestStridedSliceOpenCL, 1D) { | |||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, | |||
| 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; | |||
| float expect_data[] = {3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33}; | |||
| auto *param = GetStridedSliceParameter({3}, {36}, {3}); | |||
| TestMain({{{36}, input_data, Tensor::Category::VAR}}, {{11}, expect_data}, param, false); | |||
| } | |||
| TEST_F(TestStridedSliceOpenCL, 2D) { | |||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, | |||
| 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; | |||
| float expect_data[] = {11, 14}; | |||
| auto *param = GetStridedSliceParameter({1, 2}, {3, 8}, {2, 3}); | |||
| TestMain({{{4, 9}, input_data, Tensor::Category::VAR}}, {{1, 2}, expect_data}, param, false); | |||
| } | |||
| TEST_F(TestStridedSliceOpenCL, 3D) { | |||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, | |||
| 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; | |||
| float expect_data[] = {11, 14}; | |||
| auto *param = GetStridedSliceParameter({0, 1, 2}, {1, 3, 8}, {1, 2, 3}); | |||
| TestMain({{{1, 4, 9}, input_data, Tensor::Category::VAR}}, {{1, 1, 2}, expect_data}, param, false); | |||
| } | |||
| TEST_F(TestStridedSliceOpenCL, 4D) { | |||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, | |||
| 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; | |||
| float expect_data0[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, | |||
| 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; | |||
| auto *param = GetStridedSliceParameter({0, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); | |||
| TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{2, 2, 3, 3}, expect_data0}, param, false); | |||
| param = GetStridedSliceParameter({0, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); | |||
| TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{2, 2, 3, 3}, expect_data0}, param, true); | |||
| float expect_data1[] = {18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; | |||
| param = GetStridedSliceParameter({1, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); | |||
| TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 2, 3, 3}, expect_data1}, param, false); | |||
| float expect_data2[] = {27, 28, 29, 30, 31, 32, 33, 34, 35}; | |||
| param = GetStridedSliceParameter({1, 1, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); | |||
| TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 1, 3, 3}, expect_data2}, param, false); | |||
| float expect_data3[] = {33, 34, 35}; | |||
| param = GetStridedSliceParameter({1, 1, 2, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); | |||
| TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 1, 1, 3}, expect_data3}, param, false); | |||
| float expect_data4[] = {34}; | |||
| param = GetStridedSliceParameter({1, 1, 2, 1}, {2, 2, 3, 2}, {1, 1, 1, 1}); | |||
| TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 1, 1, 1}, expect_data4}, param, false); | |||
| } | |||
| TEST_F(TestStridedSliceOpenCL, 4D_stride2) { | |||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, | |||
| 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; | |||
| float expect_data[] = {13, 14, 31, 32}; | |||
| auto *param = GetStridedSliceParameter({0, 1, 1, 1}, {1, 4, 3, 3}, {2, 2, 2, 1}); | |||
| TestMain({{{1, 4, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 2, 1, 2}, expect_data}, param, false); | |||
| } | |||
| TEST_F(TestStridedSliceOpenCL, 4D_to_3D) { | |||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, | |||
| 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; | |||
| float expect_data[] = {18, 20, 21, 23, 27, 29, 30, 32}; | |||
| auto *param = GetStridedSliceParameter({1, 0, 0, 0}, {2, 2, 2, 3}, {1, 1, 1, 2}); | |||
| TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{2, 2, 2}, expect_data}, param, false); | |||
| } | |||
| TEST_F(TestStridedSliceOpenCL, In1D_OutOfRangeBeginNegativeStride) { | |||
| float input_data[] = {1, 2, 3, 4}; | |||
| float expect_data[] = {4, 3, 2}; | |||
| auto *param = GetStridedSliceParameter({5}, {0}, {-1}); | |||
| TestMain({{{4}, input_data, Tensor::Category::VAR}}, {{3}, expect_data}, param, false); | |||
| } | |||
| TEST_F(TestStridedSliceOpenCL, tflite_cpu) { | |||
| std::vector<float> values(32768); | |||
| for (int i = 0; i < values.size(); ++i) { | |||
| values[i] = i % 1000; | |||
| } | |||
| std::vector<std::tuple<std::string, std::vector<int>, std::vector<int>, std::vector<float>, std::vector<float>, | |||
| std::vector<int>, std::vector<int>, std::vector<int>>> | |||
| cases = {{"In1D", {4}, {2}, {1, 2, 3, 4}, {2, 3}, {1}, {3}, {1}}, | |||
| {"In1D_Int32End", {32768}, {32768}, values, values, {0}, {32768}, {1}}, | |||
| {"In1D_NegativeBegin", {4}, {2}, {1, 2, 3, 4}, {2, 3}, {-3}, {3}, {1}}, | |||
| {"In1D_OutOfRangeBegin", {4}, {3}, {1, 2, 3, 4}, {1, 2, 3}, {-5}, {3}, {1}}, | |||
| {"In1D_NegativeEnd", {4}, {1}, {1, 2, 3, 4}, {2}, {1}, {-2}, {1}}, | |||
| {"In1D_OutOfRangeEnd", {4}, {3}, {1, 2, 3, 4}, {2, 3, 4}, {-3}, {5}, {1}}, | |||
| {"In1D_NegativeBeginNegativeStride", {4}, {1}, {1, 2, 3, 4}, {3}, {-2}, {-3}, {-1}}, | |||
| {"In1D_OutOfRangeBeginNegativeStride", {4}, {1}, {1, 2, 3, 4}, {4}, {5}, {2}, {-1}}, | |||
| {"In1D_NegativeEndNegativeStride", {4}, {2}, {1, 2, 3, 4}, {3, 2}, {2}, {-4}, {-1}}, | |||
| {"In1D_OutOfRangeEndNegativeStride", {4}, {2}, {1, 2, 3, 4}, {2, 1}, {-3}, {-5}, {-1}}, | |||
| {"In1D_NegStride", {3}, {3}, {1, 2, 3}, {3, 2, 1}, {-1}, {-4}, {-1}}, | |||
| {"In1D_EvenLenStride2", {2}, {1}, {1, 2}, {1}, {0}, {2}, {2}}, | |||
| {"In1D_OddLenStride2", {3}, {2}, {1, 2, 3}, {1, 3}, {0}, {3}, {2}}, | |||
| {"In2D_Identity", {2, 3}, {2, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {0, 0}, {2, 3}, {1, 1}}, | |||
| {"In2D", {2, 3}, {1, 2}, {1, 2, 3, 4, 5, 6}, {4, 5}, {1, 0}, {2, 2}, {1, 1}}, | |||
| {"In2D_Stride2", {2, 3}, {1, 2}, {1, 2, 3, 4, 5, 6}, {1, 3}, {0, 0}, {2, 3}, {2, 2}}, | |||
| {"In2D_NegStride", {2, 3}, {1, 3}, {1, 2, 3, 4, 5, 6}, {6, 5, 4}, {1, -1}, {2, -4}, {2, -1}}, | |||
| {"In2D_BeginMask", {2, 3}, {2, 2}, {1, 2, 3, 4, 5, 6}, {1, 2, 4, 5}, {0, 0}, {2, 2}, {1, 1}}, | |||
| {"In2D_EndMask", {2, 3}, {1, 3}, {1, 2, 3, 4, 5, 6}, {4, 5, 6}, {1, 0}, {2, 3}, {1, 1}}, | |||
| {"In2D_NegStrideBeginMask", {2, 3}, {1, 3}, {1, 2, 3, 4, 5, 6}, {6, 5, 4}, {1, -1}, {2, -4}, {1, -1}}, | |||
| {"In2D_NegStrideEndMask", {2, 3}, {1, 2}, {1, 2, 3, 4, 5, 6}, {6, 5}, {1, -1}, {2, 0}, {1, -1}}, | |||
| {"In3D_Identity", | |||
| {2, 3, 2}, | |||
| {2, 3, 2}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {0, 0, 0}, | |||
| {2, 3, 2}, | |||
| {1, 1, 1}}, | |||
| {"In3D_NegStride", | |||
| {2, 3, 2}, | |||
| {2, 3, 2}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}, | |||
| {-1, -1, -1}, | |||
| {-3, -4, -3}, | |||
| {-1, -1, -1}}, | |||
| {"In3D_Strided2", | |||
| {2, 3, 2}, | |||
| {1, 2, 1}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {1, 5}, | |||
| {0, 0, 0}, | |||
| {2, 3, 2}, | |||
| {2, 2, 2}}, | |||
| {"In1D_ShrinkAxisMask1", {4}, {1}, {1, 2, 3, 4}, {2}, {1}, {2}, {1}}, | |||
| {"In1D_ShrinkAxisMask1_NegativeSlice", {4}, {1}, {0, 1, 2, 3}, {3}, {-1}, {4}, {1}}, | |||
| {"In2D_ShrinkAxis3_NegativeSlice", {4, 1}, {1}, {0, 1, 2, 3}, {2}, {-2, -1}, {3, 1}, {1, 1}}, | |||
| {"In2D_ShrinkAxis2_BeginEndAxis1_NegativeSlice", | |||
| {4, 1}, | |||
| {4}, | |||
| {0, 1, 2, 3}, | |||
| {0, 1, 2, 3}, | |||
| {0, -1}, | |||
| {4, 1}, | |||
| {1, 1}}, | |||
| {"In1D_BeginMaskShrinkAxisMask1", {4}, {1}, {1, 2, 3, 4}, {1}, {0}, {1}, {1}}, | |||
| {"In2D_ShrinkAxisMask1", {2, 3}, {3}, {1, 2, 3, 4, 5, 6}, {1, 2, 3}, {0, 0}, {1, 3}, {1, 1}}, | |||
| {"In2D_ShrinkAxisMask2", {2, 3}, {2}, {1, 2, 3, 4, 5, 6}, {1, 4}, {0, 0}, {2, 1}, {1, 1}}, | |||
| {"In2D_ShrinkAxisMask3", {2, 3}, {1}, {1, 2, 3, 4, 5, 6}, {1}, {0, 0}, {1, 1}, {1, 1}}, | |||
| {"In3D_IdentityShrinkAxis1", | |||
| {2, 3, 2}, | |||
| {3, 2}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {1, 2, 3, 4, 5, 6}, | |||
| {0, 0, 0}, | |||
| {1, 3, 2}, | |||
| {1, 1, 1}}, | |||
| {"In3D_IdentityShrinkAxis2", | |||
| {2, 3, 2}, | |||
| {2, 2}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {1, 2, 7, 8}, | |||
| {0, 0, 0}, | |||
| {2, 1, 2}, | |||
| {1, 1, 1}}, | |||
| {"In3D_IdentityShrinkAxis3", | |||
| {2, 3, 2}, | |||
| {2}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {1, 2}, | |||
| {0, 0, 0}, | |||
| {1, 1, 2}, | |||
| {1, 1, 1}}, | |||
| {"In3D_IdentityShrinkAxis4", | |||
| {2, 3, 2}, | |||
| {2, 3}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {1, 3, 5, 7, 9, 11}, | |||
| {0, 0, 0}, | |||
| {2, 3, 1}, | |||
| {1, 1, 1}}, | |||
| {"In3D_IdentityShrinkAxis5", | |||
| {2, 3, 2}, | |||
| {3}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {1, 3, 5}, | |||
| {0, 0, 0}, | |||
| {1, 3, 1}, | |||
| {1, 1, 1}}, | |||
| {"In3D_IdentityShrinkAxis6", | |||
| {2, 3, 2}, | |||
| {2}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {1, 7}, | |||
| {0, 0, 0}, | |||
| {2, 1, 1}, | |||
| {1, 1, 1}}, | |||
| {"In3D_IdentityShrinkAxis7", | |||
| {2, 3, 2}, | |||
| {1}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {1}, | |||
| {0, 0, 0}, | |||
| {1, 1, 1}, | |||
| {1, 1, 1}}, | |||
| {"In3D_IdentityShrinkAxis1Uint8", | |||
| {2, 3, 2}, | |||
| {3, 2}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {1, 2, 3, 4, 5, 6}, | |||
| {0, 0, 0}, | |||
| {1, 3, 2}, | |||
| {1, 1, 1}}, | |||
| {"In3D_IdentityShrinkAxis1int8", | |||
| {2, 3, 2}, | |||
| {3, 2}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {1, 2, 3, 4, 5, 6}, | |||
| {0, 0, 0}, | |||
| {1, 3, 2}, | |||
| {1, 1, 1}}, | |||
| {"In5D_Identity", | |||
| {2, 2, 2, 2}, | |||
| {2, 1, 2, 2}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, | |||
| {1, 2, 3, 4, 9, 10, 11, 12}, | |||
| {0, 0, 0, 0}, | |||
| {2, 1, 2, 2}, | |||
| {1, 1, 1, 1}}, | |||
| {"In5D_IdentityShrinkAxis1", | |||
| {2, 2, 2, 2}, | |||
| {1, 2, 2}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, | |||
| {1, 2, 3, 4}, | |||
| {0, 0, 0, 0}, | |||
| {1, 1, 2, 2}, | |||
| {1, 1, 1, 1}}, | |||
| {"In3D_SmallBegin", | |||
| {2, 3, 2}, | |||
| {1, 3, 2}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {1, 2, 3, 4, 5, 6}, | |||
| {0}, | |||
| {1}, | |||
| {1}}, | |||
| {"In3D_SmallBeginWithhrinkAxis1", | |||
| {2, 3, 2}, | |||
| {3, 2}, | |||
| {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, | |||
| {1, 2, 3, 4, 5, 6}, | |||
| {0}, | |||
| {1}, | |||
| {1}}}; | |||
| for (auto &case_ : cases) { | |||
| auto &name = std::get<0>(case_); | |||
| auto &input_shape = std::get<1>(case_); | |||
| auto &output_shape = std::get<2>(case_); | |||
| auto &input_data = std::get<3>(case_); | |||
| auto &expect_data = std::get<4>(case_); | |||
| auto &begin = std::get<5>(case_); | |||
| auto &end = std::get<6>(case_); | |||
| auto &stride = std::get<7>(case_); | |||
| std::cout << name << std::endl; | |||
| auto *param = GetStridedSliceParameter(begin, end, stride); | |||
| TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param, | |||
| false); | |||
| param = GetStridedSliceParameter(begin, end, stride); | |||
| TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param, | |||
| true); | |||
| } | |||
| } | |||
| TEST_F(TestStridedSliceOpenCL, tflite_opencl) { | |||
| float input_data[] = {0.1f, 0.2f, 0.3f, 0.4, 1.1f, 1.2f, 1.3f, 1.4, 10.1f, 10.2f, 10.3f, 10.4, | |||
| 11.1f, 11.2f, 11.3f, 11.4, 20.1f, 20.2f, 20.3f, 20.4, 21.1f, 21.2f, 21.3f, 21.4}; | |||
| float expect_data[] = {10.2, 10.4, 20.2, 20.4}; | |||
| auto *param = GetStridedSliceParameter({0, 1, 0, 1}, {1, 3, 2, 4}, {1, 1, 2, 2}); | |||
| TestMain({{{1, 3, 2, 4}, input_data, Tensor::Category::VAR}}, {{1, 2, 1, 2}, expect_data}, param, false); | |||
| } | |||
| } // namespace mindspore | |||
| @@ -15,11 +15,17 @@ | |||
| */ | |||
| #include <string> | |||
| #include "src/common/log_adapter.h" | |||
| #include "mindspore/lite/src/common/file_utils.h" | |||
| #include "common/common_test.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | |||
| #include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" | |||
| using mindspore::kernel::LiteKernel; | |||
| using mindspore::kernel::SubGraphOpenCLKernel; | |||
| using mindspore::lite::KernelRegistry; | |||
| using mindspore::lite::Tensor; | |||
| using mindspore::schema::Format::Format_NHWC; | |||
| namespace mindspore { | |||
| void LoadTestData(void *dst, size_t dst_size, const std::string &file_path) { | |||
| @@ -35,4 +41,80 @@ void LoadTestData(void *dst, size_t dst_size, const std::string &file_path) { | |||
| } | |||
| } | |||
| void TestMain(const std::vector<std::tuple<std::vector<int>, float *, Tensor::Category>> &input_infos, | |||
| std::tuple<std::vector<int>, float *> output_info, OpParameter *op_parameter, bool fp16_enable, | |||
| float atol, bool print_output) { | |||
| MS_LOG(DEBUG) << "initialize OpenCLRuntime and OpenCLAllocator"; | |||
| auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); | |||
| auto ocl_runtime = runtime_wrapper.GetInstance(); | |||
| EXPECT_TRUE(ocl_runtime->Init() == RET_OK); | |||
| ocl_runtime->SetFp16Enable(fp16_enable); | |||
| auto allocator = ocl_runtime->GetAllocator(); | |||
| MS_LOG(DEBUG) << "create Tensors & init weight data"; | |||
| std::vector<Tensor> tensors; | |||
| std::vector<Tensor *> kernel_inputs; | |||
| std::vector<Tensor *> subgraph_inputs; | |||
| std::map<Tensor *, float *> subgraph_inputs_data; | |||
| for (auto input_info : input_infos) { | |||
| const std::vector<int> &shape = std::get<0>(input_info); | |||
| auto *input_data = std::get<1>(input_info); | |||
| const Tensor::Category category = std::get<2>(input_info); | |||
| tensors.emplace_back(kNumberTypeFloat32, shape, Format_NHWC, category); | |||
| auto *new_tensor = &tensors.back(); | |||
| kernel_inputs.push_back(new_tensor); | |||
| if (category != Tensor::Category::VAR) { | |||
| memcpy(new_tensor->MutableData(), input_data, new_tensor->Size()); | |||
| } else { | |||
| subgraph_inputs.push_back(new_tensor); | |||
| subgraph_inputs_data[new_tensor] = input_data; | |||
| } | |||
| } | |||
| const std::vector<int> &output_shape = std::get<0>(output_info); | |||
| float *expect_data = std::get<1>(output_info); | |||
| auto output = Tensor(kNumberTypeFloat32, output_shape, Format_NHWC, Tensor::Category::VAR); | |||
| MS_LOG(DEBUG) << "create OpenCL Kernel"; | |||
| auto primitive_type = static_cast<schema::PrimitiveType>(op_parameter->type_); | |||
| kernel::KernelKey key{kernel::kGPU, kernel_inputs.front()->data_type(), primitive_type}; | |||
| auto creator = KernelRegistry::GetInstance()->GetCreator(key); | |||
| if (creator == nullptr) { | |||
| std::cerr << "get kernel registry function error: " << schema::EnumNamePrimitiveType(primitive_type) << std::endl; | |||
| free(op_parameter); | |||
| FAIL(); | |||
| } | |||
| auto *kernel = creator(kernel_inputs, {&output}, op_parameter, nullptr, key, nullptr); | |||
| if (kernel == nullptr) { | |||
| std::cerr << "call kernel registry function error: " << schema::EnumNamePrimitiveType(primitive_type) << std::endl; | |||
| free(op_parameter); | |||
| FAIL(); | |||
| } | |||
| MS_LOG(DEBUG) << "create SubGraph & init input data"; | |||
| std::vector<LiteKernel *> kernels{kernel}; | |||
| auto sub_graph = new (std::nothrow) SubGraphOpenCLKernel(subgraph_inputs, {&output}, kernels, kernels, kernels); | |||
| if (sub_graph == nullptr) { | |||
| return; | |||
| } | |||
| for (auto input : subgraph_inputs) { | |||
| EXPECT_TRUE(input->MallocData(allocator) == RET_OK); | |||
| } | |||
| EXPECT_TRUE(sub_graph->Init() == RET_OK); | |||
| for (auto input : subgraph_inputs) { | |||
| memcpy(input->data_c(), subgraph_inputs_data[input], input->Size()); | |||
| } | |||
| MS_LOG(DEBUG) << "run SubGraph & compare result"; | |||
| EXPECT_TRUE(sub_graph->Run() == RET_OK); | |||
| if (print_output) { | |||
| for (int i = 0; i < output.ElementsNum(); ++i) { | |||
| printf("%d: expect=%.3f output=%.3f\n", i, expect_data[i], reinterpret_cast<float *>(output.data_c())[i]); | |||
| } | |||
| } | |||
| CommonTest::CompareOutputData(reinterpret_cast<float *>(output.data_c()), expect_data, output.ElementsNum(), atol); | |||
| MS_LOG(DEBUG) << "release resources"; | |||
| delete sub_graph; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -14,16 +14,18 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_ | |||
| #define TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_ | |||
| #include <string> | |||
| #include <iostream> | |||
| #include "tests/ut/cpp/common/common_test.h" | |||
| #include "src/common/log_adapter.h" | |||
| #include <vector> | |||
| #include <tuple> | |||
| #include <map> | |||
| #include "mindspore/lite/src/tensor.h" | |||
| #include "mindspore/lite/src/common/file_utils.h" | |||
| #include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" | |||
| #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | |||
| #ifndef TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_ | |||
| #define TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_ | |||
| using mindspore::lite::Tensor; | |||
| namespace mindspore { | |||
| @@ -63,6 +65,10 @@ void CompareOutput(lite::Tensor *output_tensor, const std::string &file_path, T | |||
| CompareOutput(output_tensor->data_c(), expect_data, output_tensor->ElementsNum(), atol, rtol); | |||
| } | |||
| void TestMain(const std::vector<std::tuple<std::vector<int>, float *, Tensor::Category>> &input_infos, | |||
| std::tuple<std::vector<int>, float *> output_info, OpParameter *op_parameter, bool fp16_enable = false, | |||
| float atol = 10e-9, bool print_output = false); | |||
| } // namespace mindspore | |||
| #endif // TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_ | |||