| @@ -145,6 +145,8 @@ int OpenCLAllocator::GetImgDtypeSize(const ImageSize &img_size) { | |||||
| dtype_size = sizeof(cl_half); | dtype_size = sizeof(cl_half); | ||||
| } else if (img_size.dtype == CL_SIGNED_INT8) { | } else if (img_size.dtype == CL_SIGNED_INT8) { | ||||
| dtype_size = sizeof(cl_uchar); | dtype_size = sizeof(cl_uchar); | ||||
| } else if (img_size.dtype == CL_SIGNED_INT32) { | |||||
| dtype_size = sizeof(cl_int); | |||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << "Unsupported dtype " << img_size.dtype; | MS_LOG(ERROR) << "Unsupported dtype " << img_size.dtype; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -61,23 +61,35 @@ int OpenCLExecutor::RunOrTune(const std::vector<Tensor *> &inputs, const std::ve | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| } | } | ||||
| if (is_tune) { | |||||
| ret = op_kernel->Tune(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "tuning kernel failed, name: " << kernel->name(); | |||||
| return ret; | |||||
| } | |||||
| } else { | |||||
| ret = kernel->Run(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name(); | |||||
| return ret; | |||||
| // Support ZeroShape | |||||
| size_t zero_shape_num = 0; | |||||
| for (auto tensor : kernel->out_tensors()) { | |||||
| for (size_t i = 0; i < tensor->shape().size(); i++) { | |||||
| if (tensor->shape()[i] == 0) { | |||||
| zero_shape_num++; | |||||
| break; | |||||
| } | |||||
| } | } | ||||
| if (profiling_tmp) { | |||||
| auto execute_time = op_kernel->GetProfilingTimeMs(); | |||||
| MS_LOG(INFO) << "OpenCl kernel " << kernel->name() << "(" << kernel->type_str() | |||||
| << ") execute time is: " << op_kernel->GetProfilingTimeMs() << "ms"; | |||||
| callbackParam.execute_time = execute_time; | |||||
| } | |||||
| if (zero_shape_num != kernel->out_tensors().size()) { | |||||
| if (is_tune) { | |||||
| ret = op_kernel->Tune(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "tuning kernel failed, name: " << kernel->name(); | |||||
| return ret; | |||||
| } | |||||
| } else { | |||||
| ret = kernel->Run(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name(); | |||||
| return ret; | |||||
| } | |||||
| if (profiling_tmp) { | |||||
| auto execute_time = op_kernel->GetProfilingTimeMs(); | |||||
| MS_LOG(INFO) << "OpenCl kernel " << kernel->name() << "(" << kernel->type_str() | |||||
| << ") execute time is: " << op_kernel->GetProfilingTimeMs() << "ms"; | |||||
| callbackParam.execute_time = execute_time; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| ret = kernel->PostProcess(); | ret = kernel->PostProcess(); | ||||
| @@ -369,10 +369,9 @@ bool OpenCLRuntime::SetFp16Enable(bool enable) { | |||||
| } | } | ||||
| int OpenCLRuntime::BuildKernel(const cl::Kernel &kernel, const std::string &program_name, | int OpenCLRuntime::BuildKernel(const cl::Kernel &kernel, const std::string &program_name, | ||||
| const std::string &kernel_name, const std::vector<std::string> &build_options_ext, | |||||
| TypeId data_type) { | |||||
| const std::string &kernel_name, const std::vector<std::string> &build_options_ext) { | |||||
| std::string build_option = default_build_option_; | std::string build_option = default_build_option_; | ||||
| if (fp16_enable_ && data_type != kNumberTypeInt32) { | |||||
| if (fp16_enable_) { | |||||
| build_option += | build_option += | ||||
| " -DFP16_ENABLE=1 -DFLT=half -DFLT4=half4 -DFLT16=half16 -DAS_FLT4=as_half4 -DAS_UINT4=as_ushort4 -DUINT4=ushort4" | " -DFP16_ENABLE=1 -DFLT=half -DFLT4=half4 -DFLT16=half16 -DAS_FLT4=as_half4 -DAS_UINT4=as_ushort4 -DUINT4=ushort4" | ||||
| " -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh -DTO_FLT=convert_half -DTO_FLT4=convert_half4"; | " -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh -DTO_FLT=convert_half -DTO_FLT4=convert_half4"; | ||||
| @@ -120,7 +120,7 @@ class OpenCLRuntime { | |||||
| std::vector<unsigned char> GetProgramBinary(const cl::Program &program); | std::vector<unsigned char> GetProgramBinary(const cl::Program &program); | ||||
| bool LoadSource(const std::string &program_name, const std::string &source); | bool LoadSource(const std::string &program_name, const std::string &source); | ||||
| int BuildKernel(const cl::Kernel &kernel, const std::string &program_name, const std::string &kernel_name, | int BuildKernel(const cl::Kernel &kernel, const std::string &program_name, const std::string &kernel_name, | ||||
| const std::vector<std::string> &build_options_ext = {}, TypeId data_type = kNumberTypeFloat32); | |||||
| const std::vector<std::string> &build_options_ext = {}); | |||||
| int RunKernel(const cl::Kernel &kernel, const cl::NDRange &global, const cl::NDRange &local, | int RunKernel(const cl::Kernel &kernel, const cl::NDRange &global, const cl::NDRange &local, | ||||
| cl::CommandQueue *command_queue = nullptr, cl::Event *event = nullptr); | cl::CommandQueue *command_queue = nullptr, cl::Event *event = nullptr); | ||||
| int ReadOrWriteImage(void *buffer, void *data, bool is_read); | int ReadOrWriteImage(void *buffer, void *data, bool is_read); | ||||
| @@ -2,43 +2,47 @@ | |||||
| #define C4NUM 4 | #define C4NUM 4 | ||||
| __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; | __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; | ||||
| __kernel void gather(__write_only image2d_t dst_data, __read_only image2d_t src_data, __global int *indices, | |||||
| int4 src_size, int4 dst_size, int indices_num, int axis) { | |||||
| int X = get_global_id(0); // w | |||||
| int Y = get_global_id(1); // n*h | |||||
| int Z = get_global_id(2); // c | |||||
| if (X >= dst_size.x || Y >= dst_size.y * dst_size.w || Z >= dst_size.z || dst_size.y == 0) { | |||||
| return; | |||||
| #define GATHER(SUFFIX, READ_IMAGE, WRITE_IMAGE, TYPE) \ | |||||
| __kernel void gather_##SUFFIX(__write_only image2d_t dst_data, __read_only image2d_t src_data, \ | |||||
| __global int *indices, int4 src_size, int4 dst_size, int indices_num, int axis) { \ | |||||
| int X = get_global_id(0); \ | |||||
| int Y = get_global_id(1); \ | |||||
| int Z = get_global_id(2); \ | |||||
| if (X >= dst_size.x || Y >= dst_size.y * dst_size.w || Z >= dst_size.z || dst_size.y == 0) { \ | |||||
| return; \ | |||||
| } \ | |||||
| TYPE##4 res_data = (TYPE##4)(0.0f, 0.0f, 0.0f, 0.0f); \ | |||||
| int batch = Y / dst_size.y; \ | |||||
| int height = Y % dst_size.y; \ | |||||
| if (axis == 0) { \ | |||||
| res_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + Z, indices[batch] * src_size.y + height)); \ | |||||
| } else if (axis == 1) { \ | |||||
| res_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + Z, batch * src_size.y + indices[height])); \ | |||||
| } else if (axis == 2) { \ | |||||
| res_data = READ_IMAGE(src_data, smp_zero, (int2)(indices[X] * src_size.z + Z, batch * src_size.y + height)); \ | |||||
| } else if (axis == 3) { \ | |||||
| int offset[4] = {indices[Z * 4] / 4, indices[Z * 4 + 1] / 4, indices[Z * 4 + 2] / 4, indices[Z * 4 + 3] / 4}; \ | |||||
| TYPE tmp[4]; \ | |||||
| TYPE res_tmp[4]; \ | |||||
| for (int i = 0; i < indices_num; ++i) { \ | |||||
| TYPE##4 rd_data = (TYPE##4)(0.0f, 0.0f, 0.0f, 0.0f); \ | |||||
| rd_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + offset[i], batch * src_size.y + height)); \ | |||||
| if (i >= 1 && offset[i] != offset[i - 1]) { \ | |||||
| rd_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + offset[i], batch * src_size.y + height)); \ | |||||
| } \ | |||||
| tmp[0] = rd_data.x; \ | |||||
| tmp[1] = rd_data.y; \ | |||||
| tmp[2] = rd_data.z; \ | |||||
| tmp[3] = rd_data.w; \ | |||||
| res_tmp[i] = tmp[indices[Z * 4 + i] % 4]; \ | |||||
| } \ | |||||
| res_data.x = res_tmp[0]; \ | |||||
| res_data.y = res_tmp[1]; \ | |||||
| res_data.z = res_tmp[2]; \ | |||||
| res_data.w = res_tmp[3]; \ | |||||
| } \ | |||||
| WRITE_IMAGE(dst_data, (int2)(X * dst_size.z + Z, batch * dst_size.y + height), res_data); \ | |||||
| } | } | ||||
| FLT4 res_data = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f); | |||||
| int batch = Y / dst_size.y; | |||||
| int height = Y % dst_size.y; | |||||
| if (axis == 0) { | |||||
| res_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + Z, indices[batch] * src_size.y + height)); | |||||
| } else if (axis == 1) { | |||||
| res_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + Z, batch * src_size.y + indices[height])); | |||||
| } else if (axis == 2) { | |||||
| res_data = READ_IMAGE(src_data, smp_zero, (int2)(indices[X] * src_size.z + Z, batch * src_size.y + height)); | |||||
| } else if (axis == 3) { | |||||
| int offset[4] = {indices[Z * 4] / 4, indices[Z * 4 + 1] / 4, indices[Z * 4 + 2] / 4, indices[Z * 4 + 3] / 4}; | |||||
| FLT tmp[4]; | |||||
| FLT res_tmp[4]; | |||||
| for (int i = 0; i < indices_num; ++i) { | |||||
| FLT4 rd_data = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f); | |||||
| rd_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + offset[i], batch * src_size.y + height)); | |||||
| if (i >= 1 && offset[i] != offset[i - 1]) { | |||||
| rd_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + offset[i], batch * src_size.y + height)); | |||||
| } | |||||
| tmp[0] = rd_data.x; | |||||
| tmp[1] = rd_data.y; | |||||
| tmp[2] = rd_data.z; | |||||
| tmp[3] = rd_data.w; | |||||
| res_tmp[i] = tmp[indices[Z * 4 + i] % 4]; | |||||
| } | |||||
| res_data.x = res_tmp[0]; | |||||
| res_data.y = res_tmp[1]; | |||||
| res_data.z = res_tmp[2]; | |||||
| res_data.w = res_tmp[3]; | |||||
| } | |||||
| WRITE_IMAGE(dst_data, (int2)(X * dst_size.z + Z, batch * dst_size.y + height), res_data); | |||||
| } | |||||
| // GATHER(SUFFIX, READ_IMAGE, WRITE_IMAGE, TYPE) | |||||
| GATHER(float, READ_IMAGE, WRITE_IMAGE, FLT); | |||||
| GATHER(int, read_imagei, write_imagei, int); | |||||
| @@ -4,9 +4,17 @@ | |||||
| #define C4NUM 4 | #define C4NUM 4 | ||||
| #define UP_DIV(x, y) (((x) + (y) - (1)) / (y)) | #define UP_DIV(x, y) (((x) + (y) - (1)) / (y)) | ||||
| #define SET_ON_OR_OFF_VALUE(RESULT, POSITION, INDICES, ON_VALUE, OFF_VALUE) \ | |||||
| if (POSITION == INDICES) { \ | |||||
| RESULT = (float)(ON_VALUE); \ | |||||
| } else { \ | |||||
| RESULT = (float)(OFF_VALUE); \ | |||||
| } | |||||
| __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; | __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; | ||||
| __kernel void OneHotAxis0(__read_only image2d_t src_data, __write_only image2d_t dst_data, int2 in_image2d_shape, | __kernel void OneHotAxis0(__read_only image2d_t src_data, __write_only image2d_t dst_data, int2 in_image2d_shape, | ||||
| int4 out_shape, int depth, float on_value, float off_value, int C) { | |||||
| int4 out_shape, int depth, float on_value, float off_value, int C, int support_neg_index) { | |||||
| int X = get_global_id(0); // C4 | int X = get_global_id(0); // C4 | ||||
| int Y = get_global_id(1); // W | int Y = get_global_id(1); // W | ||||
| int Z = get_global_id(2); // H * N | int Z = get_global_id(2); // H * N | ||||
| @@ -14,42 +22,31 @@ __kernel void OneHotAxis0(__read_only image2d_t src_data, __write_only image2d_t | |||||
| int N = Z / out_shape.y; | int N = Z / out_shape.y; | ||||
| int H = Z % out_shape.y; | int H = Z % out_shape.y; | ||||
| int in_index = (H * out_shape.z + Y) * out_shape.w + X; | int in_index = (H * out_shape.z + Y) * out_shape.w + X; | ||||
| FLT4 indices = READ_IMAGE(src_data, smp_zero, (int2)(in_index % in_image2d_shape.x, in_index / in_image2d_shape.x)); | |||||
| int4 indices = read_imagei(src_data, smp_zero, (int2)(in_index % in_image2d_shape.x, in_index / in_image2d_shape.x)); | |||||
| int *indices_int = (int *)&indices; | int *indices_int = (int *)&indices; | ||||
| FLT4 result = (FLT4)(0.f); | |||||
| if (4 * X < C) { | |||||
| if (indices_int[0] == N) { | |||||
| result.x = (FLT)(on_value); | |||||
| } else { | |||||
| result.x = (FLT)(off_value); | |||||
| for (int i = 0; i < C4NUM; i++) { | |||||
| if (support_neg_index != 0 && indices_int[i] < 0) { | |||||
| indices_int[i] += depth; | |||||
| } | } | ||||
| } | } | ||||
| float4 result = (float4)(0.f); | |||||
| if (4 * X < C) { | |||||
| SET_ON_OR_OFF_VALUE(result.x, N, indices_int[0], on_value, off_value); | |||||
| } | |||||
| if (4 * X + 1 < C) { | if (4 * X + 1 < C) { | ||||
| if (indices_int[1] == N) { | |||||
| result.y = (FLT)(on_value); | |||||
| } else { | |||||
| result.y = (FLT)(off_value); | |||||
| } | |||||
| SET_ON_OR_OFF_VALUE(result.y, N, indices_int[1], on_value, off_value); | |||||
| } | } | ||||
| if (4 * X + 2 < C) { | if (4 * X + 2 < C) { | ||||
| if (indices_int[2] == N) { | |||||
| result.z = (FLT)(on_value); | |||||
| } else { | |||||
| result.z = (FLT)(off_value); | |||||
| } | |||||
| SET_ON_OR_OFF_VALUE(result.z, N, indices_int[2], on_value, off_value); | |||||
| } | } | ||||
| if (4 * X + 3 < C) { | if (4 * X + 3 < C) { | ||||
| if (indices_int[3] == N) { | |||||
| result.w = (FLT)(on_value); | |||||
| } else { | |||||
| result.w = (FLT)(off_value); | |||||
| } | |||||
| SET_ON_OR_OFF_VALUE(result.w, N, indices_int[3], on_value, off_value); | |||||
| } | } | ||||
| WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z), result); | |||||
| write_imagef(dst_data, (int2)(Y * out_shape.w + X, Z), result); | |||||
| } | } | ||||
| __kernel void OneHotAxis1(__read_only image2d_t src_data, __write_only image2d_t dst_data, int2 in_image2d_shape, | __kernel void OneHotAxis1(__read_only image2d_t src_data, __write_only image2d_t dst_data, int2 in_image2d_shape, | ||||
| int4 out_shape, int depth, float on_value, float off_value, int C) { | |||||
| int4 out_shape, int depth, float on_value, float off_value, int C, int support_neg_index) { | |||||
| int X = get_global_id(0); // C4 | int X = get_global_id(0); // C4 | ||||
| int Y = get_global_id(1); // W | int Y = get_global_id(1); // W | ||||
| int Z = get_global_id(2); // H * N | int Z = get_global_id(2); // H * N | ||||
| @@ -57,42 +54,31 @@ __kernel void OneHotAxis1(__read_only image2d_t src_data, __write_only image2d_t | |||||
| int N = Z / out_shape.y; | int N = Z / out_shape.y; | ||||
| int H = Z % out_shape.y; | int H = Z % out_shape.y; | ||||
| int in_index = (N * out_shape.z + Y) * out_shape.w + X; | int in_index = (N * out_shape.z + Y) * out_shape.w + X; | ||||
| FLT4 indices = READ_IMAGE(src_data, smp_zero, (int2)(in_index % in_image2d_shape.x, in_index / in_image2d_shape.x)); | |||||
| int4 indices = read_imagei(src_data, smp_zero, (int2)(in_index % in_image2d_shape.x, in_index / in_image2d_shape.x)); | |||||
| int *indices_int = (int *)&indices; | int *indices_int = (int *)&indices; | ||||
| FLT4 result = (FLT4)(0.f); | |||||
| if (4 * X < C) { | |||||
| if (indices_int[0] == H) { | |||||
| result.x = (FLT)(on_value); | |||||
| } else { | |||||
| result.x = (FLT)(off_value); | |||||
| for (int i = 0; i < C4NUM; i++) { | |||||
| if (support_neg_index != 0 && indices_int[i] < 0) { | |||||
| indices_int[i] += depth; | |||||
| } | } | ||||
| } | } | ||||
| float4 result = (float4)(0.f); | |||||
| if (4 * X < C) { | |||||
| SET_ON_OR_OFF_VALUE(result.x, H, indices_int[0], on_value, off_value); | |||||
| } | |||||
| if (4 * X + 1 < C) { | if (4 * X + 1 < C) { | ||||
| if (indices_int[1] == H) { | |||||
| result.y = (FLT)(on_value); | |||||
| } else { | |||||
| result.y = (FLT)(off_value); | |||||
| } | |||||
| SET_ON_OR_OFF_VALUE(result.y, H, indices_int[1], on_value, off_value); | |||||
| } | } | ||||
| if (4 * X + 2 < C) { | if (4 * X + 2 < C) { | ||||
| if (indices_int[2] == H) { | |||||
| result.z = (FLT)(on_value); | |||||
| } else { | |||||
| result.z = (FLT)(off_value); | |||||
| } | |||||
| SET_ON_OR_OFF_VALUE(result.z, H, indices_int[2], on_value, off_value); | |||||
| } | } | ||||
| if (4 * X + 3 < C) { | if (4 * X + 3 < C) { | ||||
| if (indices_int[3] == H) { | |||||
| result.w = (FLT)(on_value); | |||||
| } else { | |||||
| result.w = (FLT)(off_value); | |||||
| } | |||||
| SET_ON_OR_OFF_VALUE(result.w, H, indices_int[3], on_value, off_value); | |||||
| } | } | ||||
| WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z), result); | |||||
| write_imagef(dst_data, (int2)(Y * out_shape.w + X, Z), result); | |||||
| } | } | ||||
| __kernel void OneHotAxis2(__read_only image2d_t src_data, __write_only image2d_t dst_data, int2 in_image2d_shape, | __kernel void OneHotAxis2(__read_only image2d_t src_data, __write_only image2d_t dst_data, int2 in_image2d_shape, | ||||
| int4 out_shape, int depth, float on_value, float off_value, int C) { | |||||
| int4 out_shape, int depth, float on_value, float off_value, int C, int support_neg_index) { | |||||
| int X = get_global_id(0); // C4 | int X = get_global_id(0); // C4 | ||||
| int Y = get_global_id(1); // W | int Y = get_global_id(1); // W | ||||
| int Z = get_global_id(2); // H * N | int Z = get_global_id(2); // H * N | ||||
| @@ -100,42 +86,31 @@ __kernel void OneHotAxis2(__read_only image2d_t src_data, __write_only image2d_t | |||||
| int N = Z / out_shape.y; | int N = Z / out_shape.y; | ||||
| int H = Z % out_shape.y; | int H = Z % out_shape.y; | ||||
| int in_index = (N * out_shape.y + H) * out_shape.w + X; | int in_index = (N * out_shape.y + H) * out_shape.w + X; | ||||
| FLT4 indices = READ_IMAGE(src_data, smp_zero, (int2)(in_index % in_image2d_shape.x, in_index / in_image2d_shape.x)); | |||||
| int4 indices = read_imagei(src_data, smp_zero, (int2)(in_index % in_image2d_shape.x, in_index / in_image2d_shape.x)); | |||||
| int *indices_int = (int *)&indices; | int *indices_int = (int *)&indices; | ||||
| FLT4 result = (FLT4)(0.f); | |||||
| if (4 * X < C) { | |||||
| if (indices_int[0] == Y) { | |||||
| result.x = (FLT)(on_value); | |||||
| } else { | |||||
| result.x = (FLT)(off_value); | |||||
| for (int i = 0; i < C4NUM; i++) { | |||||
| if (support_neg_index != 0 && indices_int[i] < 0) { | |||||
| indices_int[i] += depth; | |||||
| } | } | ||||
| } | } | ||||
| float4 result = (float4)(0.f); | |||||
| if (4 * X < C) { | |||||
| SET_ON_OR_OFF_VALUE(result.x, Y, indices_int[0], on_value, off_value); | |||||
| } | |||||
| if (4 * X + 1 < C) { | if (4 * X + 1 < C) { | ||||
| if (indices_int[1] == Y) { | |||||
| result.y = (FLT)(on_value); | |||||
| } else { | |||||
| result.y = (FLT)(off_value); | |||||
| } | |||||
| SET_ON_OR_OFF_VALUE(result.y, Y, indices_int[1], on_value, off_value); | |||||
| } | } | ||||
| if (4 * X + 2 < C) { | if (4 * X + 2 < C) { | ||||
| if (indices_int[2] == Y) { | |||||
| result.z = (FLT)(on_value); | |||||
| } else { | |||||
| result.z = (FLT)(off_value); | |||||
| } | |||||
| SET_ON_OR_OFF_VALUE(result.z, Y, indices_int[2], on_value, off_value); | |||||
| } | } | ||||
| if (4 * X + 3 < C) { | if (4 * X + 3 < C) { | ||||
| if (indices_int[3] == Y) { | |||||
| result.w = (FLT)(on_value); | |||||
| } else { | |||||
| result.w = (FLT)(off_value); | |||||
| } | |||||
| SET_ON_OR_OFF_VALUE(result.w, Y, indices_int[3], on_value, off_value); | |||||
| } | } | ||||
| WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z), result); | |||||
| write_imagef(dst_data, (int2)(Y * out_shape.w + X, Z), result); | |||||
| } | } | ||||
| __kernel void OneHotAxis3(__read_only image2d_t src_data, __write_only image2d_t dst_data, int2 in_image2d_shape, | __kernel void OneHotAxis3(__read_only image2d_t src_data, __write_only image2d_t dst_data, int2 in_image2d_shape, | ||||
| int4 out_shape, int depth, float on_value, float off_value, int C) { | |||||
| int4 out_shape, int depth, float on_value, float off_value, int C, int support_neg_index) { | |||||
| int X = get_global_id(0); // C4 | int X = get_global_id(0); // C4 | ||||
| int Y = get_global_id(1); // W | int Y = get_global_id(1); // W | ||||
| int Z = get_global_id(2); // H * N | int Z = get_global_id(2); // H * N | ||||
| @@ -145,88 +120,54 @@ __kernel void OneHotAxis3(__read_only image2d_t src_data, __write_only image2d_t | |||||
| int ci4_size = UP_DIV(out_shape.z, C4NUM); | int ci4_size = UP_DIV(out_shape.z, C4NUM); | ||||
| int in_index_c4 = (N * out_shape.y + H) * ci4_size + Y / 4; | int in_index_c4 = (N * out_shape.y + H) * ci4_size + Y / 4; | ||||
| int in_index_c4_remainder = Y % 4; | int in_index_c4_remainder = Y % 4; | ||||
| FLT4 indices = | |||||
| READ_IMAGE(src_data, smp_zero, (int2)(in_index_c4 % in_image2d_shape.x, in_index_c4 / in_image2d_shape.x)); | |||||
| int4 indices = | |||||
| read_imagei(src_data, smp_zero, (int2)(in_index_c4 % in_image2d_shape.x, in_index_c4 / in_image2d_shape.x)); | |||||
| int *indices_int = (int *)&indices; | int *indices_int = (int *)&indices; | ||||
| int index_one = indices_int[in_index_c4_remainder]; | int index_one = indices_int[in_index_c4_remainder]; | ||||
| FLT4 result = (FLT4)(0.f); | |||||
| if (support_neg_index != 0 && index_one < 0) { | |||||
| index_one += depth; | |||||
| } | |||||
| float4 result = (float4)(0.f); | |||||
| if (4 * X < C) { | if (4 * X < C) { | ||||
| if (index_one == 4 * X) { | |||||
| result.x = (FLT)(on_value); | |||||
| } else { | |||||
| result.x = (FLT)(off_value); | |||||
| } | |||||
| SET_ON_OR_OFF_VALUE(result.x, 4 * X, index_one, on_value, off_value); | |||||
| } | } | ||||
| if (4 * X + 1 < C) { | if (4 * X + 1 < C) { | ||||
| if (index_one == 4 * X + 1) { | |||||
| result.y = (FLT)(on_value); | |||||
| } else { | |||||
| result.y = (FLT)(off_value); | |||||
| } | |||||
| SET_ON_OR_OFF_VALUE(result.y, 4 * X + 1, index_one, on_value, off_value); | |||||
| } | } | ||||
| if (4 * X + 2 < C) { | if (4 * X + 2 < C) { | ||||
| if (index_one == 4 * X + 2) { | |||||
| result.z = (FLT)(on_value); | |||||
| } else { | |||||
| result.z = (FLT)(off_value); | |||||
| } | |||||
| SET_ON_OR_OFF_VALUE(result.z, 4 * X + 2, index_one, on_value, off_value); | |||||
| } | } | ||||
| if (4 * X + 3 < C) { | if (4 * X + 3 < C) { | ||||
| if (index_one == 4 * X + 3) { | |||||
| result.w = (FLT)(on_value); | |||||
| } else { | |||||
| result.w = (FLT)(off_value); | |||||
| } | |||||
| SET_ON_OR_OFF_VALUE(result.w, 4 * X + 3, index_one, on_value, off_value); | |||||
| } | } | ||||
| WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z), result); | |||||
| write_imagef(dst_data, (int2)(Y * out_shape.w + X, Z), result); | |||||
| } | } | ||||
| __kernel void OneHot2DAxis0(__read_only image2d_t src_data, __write_only image2d_t dst_data, int2 in_image2d_shape, | |||||
| int4 out_shape, int depth, float on_value, float off_value, int C) { | |||||
| __kernel void OneHot2DAxis3(__read_only image2d_t src_data, __write_only image2d_t dst_data, int2 in_image2d_shape, | |||||
| int4 out_shape, int depth, float on_value, float off_value, int C, int support_neg_index) { | |||||
| int X = get_global_id(0); // C4 | int X = get_global_id(0); // C4 | ||||
| int Y = get_global_id(1); // W | |||||
| int Z = get_global_id(2); // N | |||||
| int Y = get_global_id(1); // W (out_shape.w is 1, Y is always 0) | |||||
| int Z = get_global_id(2); // H * N (out_shape.h is 1, so N == Z) | |||||
| if (X >= out_shape.w || Y >= out_shape.z || Z >= out_shape.x * out_shape.y) return; | if (X >= out_shape.w || Y >= out_shape.z || Z >= out_shape.x * out_shape.y) return; | ||||
| FLT4 result = (FLT4)(0.f); | |||||
| int channel = 4 * X; | |||||
| if (channel < C) { | |||||
| FLT4 indices = READ_IMAGE(src_data, smp_zero, (int2)(0, channel)); | |||||
| int index = ((int *)&indices)[0]; | |||||
| if (index == Z) { | |||||
| result.x = (FLT)(on_value); | |||||
| } else { | |||||
| result.x = (FLT)(off_value); | |||||
| } | |||||
| int in_index_c4_remainder = Z % 4; | |||||
| int4 indices = read_imagei(src_data, smp_zero, (int2)(Z / C4NUM, 0)); | |||||
| int *indices_int = (int *)&indices; | |||||
| int index_one = indices_int[in_index_c4_remainder]; | |||||
| if (support_neg_index != 0 && index_one < 0) { | |||||
| index_one += depth; | |||||
| } | } | ||||
| channel++; | |||||
| if (channel < C) { | |||||
| FLT4 indices = READ_IMAGE(src_data, smp_zero, (int2)(0, channel)); | |||||
| int index = ((int *)&indices)[0]; | |||||
| if (index == Z) { | |||||
| result.y = (FLT)(on_value); | |||||
| } else { | |||||
| result.y = (FLT)(off_value); | |||||
| } | |||||
| float4 result = (float4)(0.f); | |||||
| if (4 * X < C) { | |||||
| SET_ON_OR_OFF_VALUE(result.x, 4 * X, index_one, on_value, off_value); | |||||
| } | } | ||||
| channel++; | |||||
| if (channel < C) { | |||||
| FLT4 indices = READ_IMAGE(src_data, smp_zero, (int2)(0, channel)); | |||||
| int index = ((int *)&indices)[0]; | |||||
| if (index == Z) { | |||||
| result.z = (FLT)(on_value); | |||||
| } else { | |||||
| result.z = (FLT)(off_value); | |||||
| } | |||||
| if (4 * X + 1 < C) { | |||||
| SET_ON_OR_OFF_VALUE(result.y, 4 * X + 1, index_one, on_value, off_value); | |||||
| } | } | ||||
| channel++; | |||||
| if (channel < C) { | |||||
| FLT4 indices = READ_IMAGE(src_data, smp_zero, (int2)(0, channel)); | |||||
| int index = ((int *)&indices)[0]; | |||||
| if (index == Z) { | |||||
| result.w = (FLT)(on_value); | |||||
| } else { | |||||
| result.w = (FLT)(off_value); | |||||
| } | |||||
| if (4 * X + 2 < C) { | |||||
| SET_ON_OR_OFF_VALUE(result.z, 4 * X + 2, index_one, on_value, off_value); | |||||
| } | |||||
| if (4 * X + 3 < C) { | |||||
| SET_ON_OR_OFF_VALUE(result.w, 4 * X + 3, index_one, on_value, off_value); | |||||
| } | } | ||||
| WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z), result); | |||||
| write_imagef(dst_data, (int2)(Y * out_shape.w + X, Z), result); | |||||
| } | } | ||||
| @@ -2,60 +2,53 @@ | |||||
| #define C4NUM 4 | #define C4NUM 4 | ||||
| #define UP_DIV(x, y) (((x) + (y) - (1)) / (y)) | #define UP_DIV(x, y) (((x) + (y) - (1)) / (y)) | ||||
| __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; | __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; | ||||
| __kernel void reshape_NHWC4(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 src_size, | |||||
| int4 dst_size) { | |||||
| int X = get_global_id(0); | |||||
| int Y = get_global_id(1); | |||||
| int CO4 = UP_DIV(dst_size.z, C4NUM); | |||||
| int CO4_rem = dst_size.z % C4NUM; | |||||
| if (X >= dst_size.x || Y > dst_size.y) { | |||||
| return; | |||||
| } | |||||
| int CI4 = UP_DIV(src_size.x, C4NUM); | |||||
| int CI4_rem = src_size.x % C4NUM; | |||||
| CI4_rem = (CI4_rem == 0) ? C4NUM : CI4_rem; | |||||
| int in_img_x = CI4 * src_size.y; | |||||
| FLT4 res = (FLT4)(0.0f); | |||||
| FLT tmp[4]; | |||||
| FLT res_tmp[4]; | |||||
| int gcnt = 0; | |||||
| if (CO4_rem == 0 && ((CI4_rem & 0x3) == 0)) { | |||||
| gcnt = X + dst_size.x * Y; | |||||
| res = READ_IMAGE(src_data, smp_zero, (int2)(gcnt % in_img_x, gcnt / in_img_x)); | |||||
| WRITE_IMAGE(dst_data, (int2)(X, Y), res); | |||||
| } else { | |||||
| int start = ((X / CO4 * dst_size.z + min(dst_size.z, (X % CO4) * C4NUM)) + dst_size.w * Y); | |||||
| gcnt = start / src_size.x * CI4 + (start % src_size.x) / C4NUM; | |||||
| start = start % src_size.x % C4NUM; | |||||
| for (int i = 0, n = 0, j = start; i < C4NUM; ++n, j = 0) { | |||||
| int X_src = (gcnt + n) % in_img_x; | |||||
| res = READ_IMAGE(src_data, smp_zero, (int2)(X_src, (gcnt + n) / in_img_x)); | |||||
| tmp[0] = res.x; | |||||
| tmp[1] = res.y; | |||||
| tmp[2] = res.z; | |||||
| tmp[3] = res.w; | |||||
| int k = (X_src % CI4) == (CI4 - 1) ? CI4_rem : C4NUM; | |||||
| for (; j < k && i < C4NUM; ++j, ++i) { | |||||
| res_tmp[i] = tmp[j]; | |||||
| } | |||||
| } | |||||
| res.x = res_tmp[0]; | |||||
| res.y = res_tmp[1]; | |||||
| res.z = res_tmp[2]; | |||||
| res.w = res_tmp[3]; | |||||
| WRITE_IMAGE(dst_data, (int2)(X, Y), res); | |||||
| } | |||||
| } | |||||
| __kernel void reshape_NC4HW4(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 src_size, | |||||
| int4 dst_size) { | |||||
| int X = get_global_id(0); | |||||
| int Y = get_global_id(1); | |||||
| if (X >= dst_size.x || Y > dst_size.y) { | |||||
| return; | |||||
| #define RESHAPE_NHWC4(SUFFIX, READ_IMAGE, WRITE_IMAGE, TYPE) \ | |||||
| __kernel void reshape_NHWC4_##SUFFIX(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 src_size, \ | |||||
| int4 dst_size) { \ | |||||
| int X = get_global_id(0); \ | |||||
| int Y = get_global_id(1); \ | |||||
| int CO4 = UP_DIV(dst_size.z, C4NUM); \ | |||||
| int CO4_rem = dst_size.z % C4NUM; \ | |||||
| if (X >= dst_size.x || Y > dst_size.y) { \ | |||||
| return; \ | |||||
| } \ | |||||
| int CI4 = UP_DIV(src_size.x, C4NUM); \ | |||||
| int CI4_rem = src_size.x % C4NUM; \ | |||||
| CI4_rem = (CI4_rem == 0) ? C4NUM : CI4_rem; \ | |||||
| int in_img_x = CI4 * src_size.y; \ | |||||
| TYPE##4 res = (TYPE##4)(0.0f); \ | |||||
| TYPE tmp[4]; \ | |||||
| TYPE res_tmp[4]; \ | |||||
| int gcnt = 0; \ | |||||
| if (CO4_rem == 0 && ((CI4_rem & 0x3) == 0)) { \ | |||||
| gcnt = X + dst_size.x * Y; \ | |||||
| res = READ_IMAGE(src_data, smp_zero, (int2)(gcnt % in_img_x, gcnt / in_img_x)); \ | |||||
| WRITE_IMAGE(dst_data, (int2)(X, Y), res); \ | |||||
| } else { \ | |||||
| int start = ((X / CO4 * dst_size.z + min(dst_size.z, (X % CO4) * C4NUM)) + dst_size.w * Y); \ | |||||
| gcnt = start / src_size.x * CI4 + (start % src_size.x) / C4NUM; \ | |||||
| start = start % src_size.x % C4NUM; \ | |||||
| for (int i = 0, n = 0, j = start; i < C4NUM; ++n, j = 0) { \ | |||||
| int X_src = (gcnt + n) % in_img_x; \ | |||||
| res = READ_IMAGE(src_data, smp_zero, (int2)(X_src, (gcnt + n) / in_img_x)); \ | |||||
| tmp[0] = res.x; \ | |||||
| tmp[1] = res.y; \ | |||||
| tmp[2] = res.z; \ | |||||
| tmp[3] = res.w; \ | |||||
| int k = (X_src % CI4) == (CI4 - 1) ? CI4_rem : C4NUM; \ | |||||
| for (; j < k && i < C4NUM; ++j, ++i) { \ | |||||
| res_tmp[i] = tmp[j]; \ | |||||
| } \ | |||||
| } \ | |||||
| res.x = res_tmp[0]; \ | |||||
| res.y = res_tmp[1]; \ | |||||
| res.z = res_tmp[2]; \ | |||||
| res.w = res_tmp[3]; \ | |||||
| WRITE_IMAGE(dst_data, (int2)(X, Y), res); \ | |||||
| } \ | |||||
| } | } | ||||
| int CI4 = UP_DIV(src_size.x, C4NUM); | |||||
| int in_img_x = CI4 * src_size.y; | |||||
| int gcnt = X + dst_size.x * Y; | |||||
| WRITE_IMAGE(dst_data, (int2)(X, Y), READ_IMAGE(src_data, smp_zero, (int2)(gcnt % in_img_x, gcnt / in_img_x))); | |||||
| } | |||||
| // RESHAPE_NHWC4(SUFFIX, READ_IMAGE, WRITE_IMAGE, TYPE) | |||||
| RESHAPE_NHWC4(float, READ_IMAGE, WRITE_IMAGE, FLT); | |||||
| RESHAPE_NHWC4(int, read_imagei, write_imagei, int); | |||||
| @@ -1,82 +1,82 @@ | |||||
| #pragma OPENCL EXTENSION cl_khr_fp16 : enable | #pragma OPENCL EXTENSION cl_khr_fp16 : enable | ||||
| __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; | __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; | ||||
| #define BUF_to_IMG(src_dtype, dst_dtype, SRC_TYPE, DST_TYPE, WRITE_IMAGE_OUT) \ | |||||
| __kernel void BUF_to_IMG_##src_dtype##_##dst_dtype(__global SRC_TYPE##4 *src_data, __write_only image2d_t dst_data, \ | |||||
| int4 size, int4 shape) { \ | |||||
| int X = get_global_id(0); \ | |||||
| int Y = get_global_id(1); \ | |||||
| int Z = get_global_id(2); \ | |||||
| if (X >= size.x || Y >= size.y || Z >= size.z) { \ | |||||
| return; \ | |||||
| } \ | |||||
| DST_TYPE##4 data = (DST_TYPE##4)(0.f); \ | |||||
| int offset = (X * shape.z + Y) * shape.w + Z * 4; \ | |||||
| __global SRC_TYPE *src_addr = (__global SRC_TYPE *)src_data; \ | |||||
| src_addr += offset; \ | |||||
| if ((Z + 1) * 4 <= shape.w) { \ | |||||
| data = convert_##DST_TYPE##4(((__global SRC_TYPE##4 *)src_addr)[0]); \ | |||||
| } else { \ | |||||
| if ((shape.w - Z * 4) >= 1) { \ | |||||
| data.x = (DST_TYPE)src_addr[0]; \ | |||||
| } \ | |||||
| if ((shape.w - Z * 4) >= 2) { \ | |||||
| data.y = (DST_TYPE)src_addr[1]; \ | |||||
| } \ | |||||
| if ((shape.w - Z * 4) >= 3) { \ | |||||
| data.z = (DST_TYPE)src_addr[2]; \ | |||||
| } \ | |||||
| } \ | |||||
| if (size.y * size.z <= MAX_IMAGE2D_WIDTH) \ | |||||
| WRITE_IMAGE_OUT(dst_data, (int2)(Y * size.z + Z, X), data); \ | |||||
| else \ | |||||
| WRITE_IMAGE_OUT(dst_data, (int2)(Z, X * size.y + Y), data); \ | |||||
| #define BUF_to_IMG(src_dtype, dst_dtype, SRC_TYPE, DST_TYPE, WRITE_IMAGE_OUT) \ | |||||
| __kernel void BUF_to_IMG_##src_dtype##_##dst_dtype(__global SRC_TYPE##4 * src_data, __write_only image2d_t dst_data, \ | |||||
| int4 size, int4 shape) { \ | |||||
| int X = get_global_id(0); \ | |||||
| int Y = get_global_id(1); \ | |||||
| int Z = get_global_id(2); \ | |||||
| if (X >= size.x || Y >= size.y || Z >= size.z) { \ | |||||
| return; \ | |||||
| } \ | |||||
| DST_TYPE##4 data = (DST_TYPE##4)(0.f); \ | |||||
| int offset = (X * shape.z + Y) * shape.w + Z * 4; \ | |||||
| __global SRC_TYPE *src_addr = (__global SRC_TYPE *)src_data; \ | |||||
| src_addr += offset; \ | |||||
| if ((Z + 1) * 4 <= shape.w) { \ | |||||
| data = convert_##DST_TYPE##4(((__global SRC_TYPE##4 *)src_addr)[0]); \ | |||||
| } else { \ | |||||
| if ((shape.w - Z * 4) >= 1) { \ | |||||
| data.x = (DST_TYPE)src_addr[0]; \ | |||||
| } \ | |||||
| if ((shape.w - Z * 4) >= 2) { \ | |||||
| data.y = (DST_TYPE)src_addr[1]; \ | |||||
| } \ | |||||
| if ((shape.w - Z * 4) >= 3) { \ | |||||
| data.z = (DST_TYPE)src_addr[2]; \ | |||||
| } \ | |||||
| } \ | |||||
| if (size.y * size.z <= MAX_IMAGE2D_WIDTH) \ | |||||
| WRITE_IMAGE_OUT(dst_data, (int2)(Y * size.z + Z, X), data); \ | |||||
| else \ | |||||
| WRITE_IMAGE_OUT(dst_data, (int2)(Z, X * size.y + Y), data); \ | |||||
| } | } | ||||
| // BUF_to_IMG(src_dtype, dst_dtype, SRC_TYPE, DST_TYPE, WRITE_IMAGE_OUT) | // BUF_to_IMG(src_dtype, dst_dtype, SRC_TYPE, DST_TYPE, WRITE_IMAGE_OUT) | ||||
| BUF_to_IMG(float32, float32, float, float, write_imagef); | BUF_to_IMG(float32, float32, float, float, write_imagef); | ||||
| BUF_to_IMG(float32, float16, float, half, write_imageh); | BUF_to_IMG(float32, float16, float, half, write_imageh); | ||||
| BUF_to_IMG(float16, float16, half, half, write_imageh); | BUF_to_IMG(float16, float16, half, half, write_imageh); | ||||
| BUF_to_IMG(int32, int32, float, float, write_imagef); | |||||
| BUF_to_IMG(uint32, uint32, float, float, write_imagef); | |||||
| BUF_to_IMG(int32, int32, int, int, write_imagei); | |||||
| BUF_to_IMG(uint32, uint32, int, int, write_imagei); | |||||
| BUF_to_IMG(int8, int8, char, int, write_imagei); | BUF_to_IMG(int8, int8, char, int, write_imagei); | ||||
| #define IMG_to_BUF(src_dtype, dst_dtype, SRC_TYPE, DST_TYPE, READ_IMAGE_IN) \ | |||||
| #define IMG_to_BUF(src_dtype, dst_dtype, SRC_TYPE, DST_TYPE, READ_IMAGE_IN) \ | |||||
| __kernel void IMG_to_BUF_##src_dtype##_##dst_dtype(__read_only image2d_t src_data, __global DST_TYPE##4 * dst_data, \ | __kernel void IMG_to_BUF_##src_dtype##_##dst_dtype(__read_only image2d_t src_data, __global DST_TYPE##4 * dst_data, \ | ||||
| int4 size, int4 shape) { \ | |||||
| int X = get_global_id(0); \ | |||||
| int Y = get_global_id(1); \ | |||||
| int Z = get_global_id(2); \ | |||||
| if (X >= size.x || Y >= size.y || Z >= size.z) { \ | |||||
| return; \ | |||||
| } \ | |||||
| int4 size, int4 shape) { \ | |||||
| int X = get_global_id(0); \ | |||||
| int Y = get_global_id(1); \ | |||||
| int Z = get_global_id(2); \ | |||||
| if (X >= size.x || Y >= size.y || Z >= size.z) { \ | |||||
| return; \ | |||||
| } \ | |||||
| DST_TYPE##4 data; \ | DST_TYPE##4 data; \ | ||||
| if (size.y * size.z <= MAX_IMAGE2D_WIDTH) \ | |||||
| if (size.y * size.z <= MAX_IMAGE2D_WIDTH) \ | |||||
| data = convert_##DST_TYPE##4(READ_IMAGE_IN(src_data, smp_zero, (int2)(Y * size.z + Z, X))); \ | data = convert_##DST_TYPE##4(READ_IMAGE_IN(src_data, smp_zero, (int2)(Y * size.z + Z, X))); \ | ||||
| else \ | |||||
| else \ | |||||
| data = convert_##DST_TYPE##4(READ_IMAGE_IN(src_data, smp_zero, (int2)(Z, X * size.y + Y))); \ | data = convert_##DST_TYPE##4(READ_IMAGE_IN(src_data, smp_zero, (int2)(Z, X * size.y + Y))); \ | ||||
| int offset = (X * shape.z + Y) * shape.w + Z * 4; \ | |||||
| __global DST_TYPE *dst_addr = (__global DST_TYPE *)dst_data; \ | |||||
| dst_addr += offset; \ | |||||
| if ((Z + 1) * 4 <= shape.w) { \ | |||||
| int offset = (X * shape.z + Y) * shape.w + Z * 4; \ | |||||
| __global DST_TYPE *dst_addr = (__global DST_TYPE *)dst_data; \ | |||||
| dst_addr += offset; \ | |||||
| if ((Z + 1) * 4 <= shape.w) { \ | |||||
| ((__global DST_TYPE##4 *)dst_addr)[0] = data; \ | ((__global DST_TYPE##4 *)dst_addr)[0] = data; \ | ||||
| } else { \ | |||||
| if (shape.w - Z * 4 >= 1) { \ | |||||
| dst_addr[0] = data.x; \ | |||||
| } \ | |||||
| if (shape.w - Z * 4 >= 2) { \ | |||||
| dst_addr[1] = data.y; \ | |||||
| } \ | |||||
| if (shape.w - Z * 4 >= 3) { \ | |||||
| dst_addr[2] = data.z; \ | |||||
| } \ | |||||
| } \ | |||||
| } else { \ | |||||
| if (shape.w - Z * 4 >= 1) { \ | |||||
| dst_addr[0] = data.x; \ | |||||
| } \ | |||||
| if (shape.w - Z * 4 >= 2) { \ | |||||
| dst_addr[1] = data.y; \ | |||||
| } \ | |||||
| if (shape.w - Z * 4 >= 3) { \ | |||||
| dst_addr[2] = data.z; \ | |||||
| } \ | |||||
| } \ | |||||
| } | } | ||||
| // IMG_to_BUF(src_dtype, dst_dtype, SRC_TYPE, DST_TYPE, READ_IMAGE_IN) | // IMG_to_BUF(src_dtype, dst_dtype, SRC_TYPE, DST_TYPE, READ_IMAGE_IN) | ||||
| IMG_to_BUF(float32, float32, float, float, read_imagef); | IMG_to_BUF(float32, float32, float, float, read_imagef); | ||||
| IMG_to_BUF(float16, float32, half, float, read_imageh); | IMG_to_BUF(float16, float32, half, float, read_imageh); | ||||
| IMG_to_BUF(float16, float16, half, half, read_imageh); | IMG_to_BUF(float16, float16, half, half, read_imageh); | ||||
| IMG_to_BUF(int32, int32, float, float, read_imagef); | |||||
| IMG_to_BUF(uint32, uint32, float, float, read_imagef); | |||||
| IMG_to_BUF(int32, int32, int, int, read_imagei); | |||||
| IMG_to_BUF(uint32, uint32, int, int, read_imagei); | |||||
| IMG_to_BUF(int8, int8, char, char, read_imagei); | IMG_to_BUF(int8, int8, char, char, read_imagei); | ||||
| @@ -207,7 +207,7 @@ int ConcatOpenCLKernel::Prepare() { | |||||
| std::string source = concat_source; | std::string source = concat_source; | ||||
| std::string program_name = "Concat"; | std::string program_name = "Concat"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, {}, out_tensors_[0]->data_type()); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, {}); | |||||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | MS_LOG(DEBUG) << kernel_name << " Init Done!"; | ||||
| SetConstArgs(); | SetConstArgs(); | ||||
| SetGlobalLocal(); | SetGlobalLocal(); | ||||
| @@ -32,8 +32,8 @@ using mindspore::schema::PrimitiveType_Gather; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int GatherOpenCLKernel::CheckSpecs() { | int GatherOpenCLKernel::CheckSpecs() { | ||||
| if (in_tensors_.size() != 2) { | |||||
| MS_LOG(ERROR) << "GatherOpenCLKernel only supports 2 input Tensor but get " << in_tensors_.size(); | |||||
| if (in_tensors_.size() != 3) { | |||||
| MS_LOG(ERROR) << "GatherOpenCLKernel only supports 3 input Tensor but get " << in_tensors_.size(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (out_tensors_.size() != 1) { | if (out_tensors_.size() != 1) { | ||||
| @@ -67,6 +67,10 @@ int GatherOpenCLKernel::CheckSpecs() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| axis_ = *reinterpret_cast<int32_t *>(in_tensors_.at(2)->data_c()); | axis_ = *reinterpret_cast<int32_t *>(in_tensors_.at(2)->data_c()); | ||||
| if (in_tensors_.at(2)->data_c() == nullptr) { | |||||
| MS_LOG(ERROR) << "GatherOpenCLKernel need Axis."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (axis_ < 0) { | if (axis_ < 0) { | ||||
| axis_ += input_ndim; | axis_ += input_ndim; | ||||
| } | } | ||||
| @@ -102,6 +106,11 @@ void GatherOpenCLKernel::SetGlobalLocal() { | |||||
| int GatherOpenCLKernel::Prepare() { | int GatherOpenCLKernel::Prepare() { | ||||
| std::string kernel_name = "gather"; | std::string kernel_name = "gather"; | ||||
| if (desc_.data_type == kNumberTypeInt32) { | |||||
| kernel_name += "_int"; | |||||
| } else { | |||||
| kernel_name += "_float"; | |||||
| } | |||||
| if (in_tensors_.at(0)->shape().size() == 1 && axis_ == 0) { | if (in_tensors_.at(0)->shape().size() == 1 && axis_ == 0) { | ||||
| axis_ = 3; | axis_ = 3; | ||||
| } | } | ||||
| @@ -110,7 +119,7 @@ int GatherOpenCLKernel::Prepare() { | |||||
| #else | #else | ||||
| std::string program_name = "gather"; | std::string program_name = "gather"; | ||||
| ocl_runtime_->LoadSource(program_name, gather_source); | ocl_runtime_->LoadSource(program_name, gather_source); | ||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, {}, out_tensors_[0]->data_type()); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, {}); | |||||
| #endif | #endif | ||||
| if (in_tensors_.at(1)->IsConst()) { | if (in_tensors_.at(1)->IsConst()) { | ||||
| intensor1_is_tensor = false; | intensor1_is_tensor = false; | ||||
| @@ -130,7 +139,7 @@ int GatherOpenCLKernel::ConvertTensorToweight() { | |||||
| auto indices_tensor = in_tensors_.at(1); | auto indices_tensor = in_tensors_.at(1); | ||||
| auto indices_num = indices_tensor->ElementsNum(); | auto indices_num = indices_tensor->ElementsNum(); | ||||
| indices_data_ = | indices_data_ = | ||||
| reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num), lite::opencl::MemType::BUF); | |||||
| reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num, lite::opencl::MemType::BUF)); | |||||
| allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true); | allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true); | ||||
| if (indices_data_ == nullptr) { | if (indices_data_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Memory allocation failed"; | MS_LOG(ERROR) << "Memory allocation failed"; | ||||
| @@ -156,7 +165,7 @@ int GatherOpenCLKernel::InitWeights() { | |||||
| auto indices_num = indices_tensor->ElementsNum(); | auto indices_num = indices_tensor->ElementsNum(); | ||||
| auto allocator = ocl_runtime_->GetAllocator(); | auto allocator = ocl_runtime_->GetAllocator(); | ||||
| indices_data_ = | indices_data_ = | ||||
| reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num), lite::opencl::MemType::BUF); | |||||
| reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num, lite::opencl::MemType::BUF)); | |||||
| if (indices_data_ == nullptr) { | if (indices_data_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Memory allocation failed"; | MS_LOG(ERROR) << "Memory allocation failed"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -198,5 +207,6 @@ int GatherOpenCLKernel::Run() { | |||||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Gather, OpenCLKernelCreator<GatherOpenCLKernel>); | REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Gather, OpenCLKernelCreator<GatherOpenCLKernel>); | ||||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Gather, OpenCLKernelCreator<GatherOpenCLKernel>); | REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Gather, OpenCLKernelCreator<GatherOpenCLKernel>); | ||||
| REG_KERNEL(kGPU, kNumberTypeInt32, PrimitiveType_Gather, OpenCLKernelCreator<GatherOpenCLKernel>); | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -39,12 +39,12 @@ int OneHotOpenCLKernel::CheckSpecs() { | |||||
| int OneHotOpenCLKernel::Prepare() { | int OneHotOpenCLKernel::Prepare() { | ||||
| std::string kernel_name = "OneHot"; | std::string kernel_name = "OneHot"; | ||||
| auto param = reinterpret_cast<OneHotParameter *>(op_parameter_); | |||||
| param_ = reinterpret_cast<OneHotParameter *>(op_parameter_); | |||||
| in_shape_ = GpuTensorInfo(in_tensors_[0]); | in_shape_ = GpuTensorInfo(in_tensors_[0]); | ||||
| out_shape_ = GpuTensorInfo(out_tensors_[0]); | out_shape_ = GpuTensorInfo(out_tensors_[0]); | ||||
| axis_ = out_shape_.AlignAxis(param->axis_); | |||||
| if (in_tensors_[0]->shape().size() == 1 && axis_ == 0) { | |||||
| kernel_name += "2DAxis0"; | |||||
| axis_ = out_shape_.AlignAxis(param_->axis_); | |||||
| if (in_tensors_[0]->shape().size() == 1 && axis_ == 3) { | |||||
| kernel_name += "2DAxis3"; | |||||
| } else { | } else { | ||||
| kernel_name += "Axis" + std::to_string(axis_); | kernel_name += "Axis" + std::to_string(axis_); | ||||
| } | } | ||||
| @@ -54,7 +54,7 @@ int OneHotOpenCLKernel::Prepare() { | |||||
| std::string source = one_hot_source; | std::string source = one_hot_source; | ||||
| std::string program_name = "OneHot"; | std::string program_name = "OneHot"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, {}); | |||||
| #endif | #endif | ||||
| InitWeights(); | InitWeights(); | ||||
| SetConstArgs(); | SetConstArgs(); | ||||
| @@ -69,10 +69,12 @@ int OneHotOpenCLKernel::InitWeights() { | |||||
| if (in_tensors_.size() == 3) { // onnx | if (in_tensors_.size() == 3) { // onnx | ||||
| off_value_ = static_cast<float *>(in_tensors_[2]->data_c())[0]; | off_value_ = static_cast<float *>(in_tensors_[2]->data_c())[0]; | ||||
| on_value_ = static_cast<float *>(in_tensors_[2]->data_c())[1]; | on_value_ = static_cast<float *>(in_tensors_[2]->data_c())[1]; | ||||
| param_->support_neg_index_ = true; | |||||
| } | } | ||||
| if (in_tensors_.size() == 4) { // tf | if (in_tensors_.size() == 4) { // tf | ||||
| on_value_ = static_cast<float *>(in_tensors_[2]->data_c())[0]; | on_value_ = static_cast<float *>(in_tensors_[2]->data_c())[0]; | ||||
| off_value_ = static_cast<float *>(in_tensors_[3]->data_c())[0]; | off_value_ = static_cast<float *>(in_tensors_[3]->data_c())[0]; | ||||
| param_->support_neg_index_ = false; | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -87,7 +89,8 @@ void OneHotOpenCLKernel::SetConstArgs() { | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, depth_); | ocl_runtime_->SetKernelArg(kernel_, arg_idx++, depth_); | ||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, on_value_); | ocl_runtime_->SetKernelArg(kernel_, arg_idx++, on_value_); | ||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, off_value_); | ocl_runtime_->SetKernelArg(kernel_, arg_idx++, off_value_); | ||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx, static_cast<int>(out_shape_.C)); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<int>(out_shape_.C)); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx, static_cast<int>(param_->support_neg_index_)); | |||||
| } | } | ||||
| void OneHotOpenCLKernel::SetGlobalLocal() { | void OneHotOpenCLKernel::SetGlobalLocal() { | ||||
| local_size_ = {}; | local_size_ = {}; | ||||
| @@ -103,6 +106,5 @@ int OneHotOpenCLKernel::Run() { | |||||
| return mindspore::lite::RET_OK; | return mindspore::lite::RET_OK; | ||||
| } | } | ||||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_OneHot, OpenCLKernelCreator<OneHotOpenCLKernel>) | |||||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_OneHot, OpenCLKernelCreator<OneHotOpenCLKernel>) | |||||
| REG_KERNEL(kGPU, kNumberTypeInt32, PrimitiveType_OneHot, OpenCLKernelCreator<OneHotOpenCLKernel>) | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -43,6 +43,7 @@ class OneHotOpenCLKernel : public OpenCLKernel { | |||||
| int axis_{0}; | int axis_{0}; | ||||
| GpuTensorInfo in_shape_; | GpuTensorInfo in_shape_; | ||||
| GpuTensorInfo out_shape_; | GpuTensorInfo out_shape_; | ||||
| OneHotParameter *param_; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -73,13 +73,18 @@ void ReshapeOpenCLKernel::SetGlobalLocal() { | |||||
| int ReshapeOpenCLKernel::Prepare() { | int ReshapeOpenCLKernel::Prepare() { | ||||
| std::string kernel_name = "reshape_NHWC4"; | std::string kernel_name = "reshape_NHWC4"; | ||||
| if (desc_.data_type == kNumberTypeInt32) { | |||||
| kernel_name += "_int"; | |||||
| } else { | |||||
| kernel_name += "_float"; | |||||
| } | |||||
| #ifdef PROGRAM_WITH_IL | #ifdef PROGRAM_WITH_IL | ||||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | ||||
| #else | #else | ||||
| std::string source = reshape_source; | std::string source = reshape_source; | ||||
| std::string program_name = "reshape"; | std::string program_name = "reshape"; | ||||
| ocl_runtime_->LoadSource(program_name, source); | ocl_runtime_->LoadSource(program_name, source); | ||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, {}, out_tensors_[0]->data_type()); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, {}); | |||||
| #endif | #endif | ||||
| SetGlobalLocal(); | SetGlobalLocal(); | ||||
| @@ -109,10 +114,14 @@ int ReshapeOpenCLKernel::PreProcess() { | |||||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Reshape, OpenCLKernelCreator<ReshapeOpenCLKernel>) | REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Reshape, OpenCLKernelCreator<ReshapeOpenCLKernel>) | ||||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Reshape, OpenCLKernelCreator<ReshapeOpenCLKernel>) | REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Reshape, OpenCLKernelCreator<ReshapeOpenCLKernel>) | ||||
| REG_KERNEL(kGPU, kNumberTypeInt32, PrimitiveType_Reshape, OpenCLKernelCreator<ReshapeOpenCLKernel>) | |||||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Squeeze, OpenCLKernelCreator<ReshapeOpenCLKernel>) | REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Squeeze, OpenCLKernelCreator<ReshapeOpenCLKernel>) | ||||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Squeeze, OpenCLKernelCreator<ReshapeOpenCLKernel>) | REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Squeeze, OpenCLKernelCreator<ReshapeOpenCLKernel>) | ||||
| REG_KERNEL(kGPU, kNumberTypeInt32, PrimitiveType_Squeeze, OpenCLKernelCreator<ReshapeOpenCLKernel>) | |||||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Unsqueeze, OpenCLKernelCreator<ReshapeOpenCLKernel>) | REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Unsqueeze, OpenCLKernelCreator<ReshapeOpenCLKernel>) | ||||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Unsqueeze, OpenCLKernelCreator<ReshapeOpenCLKernel>) | REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Unsqueeze, OpenCLKernelCreator<ReshapeOpenCLKernel>) | ||||
| REG_KERNEL(kGPU, kNumberTypeInt32, PrimitiveType_Unsqueeze, OpenCLKernelCreator<ReshapeOpenCLKernel>) | |||||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_ExpandDims, OpenCLKernelCreator<ReshapeOpenCLKernel>) | REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_ExpandDims, OpenCLKernelCreator<ReshapeOpenCLKernel>) | ||||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_ExpandDims, OpenCLKernelCreator<ReshapeOpenCLKernel>) | REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_ExpandDims, OpenCLKernelCreator<ReshapeOpenCLKernel>) | ||||
| REG_KERNEL(kGPU, kNumberTypeInt32, PrimitiveType_ExpandDims, OpenCLKernelCreator<ReshapeOpenCLKernel>) | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -70,15 +70,15 @@ int OpenCLKernel::GetImageSize(size_t idx, lite::opencl::ImageSize *img_size) { | |||||
| auto img_info = GpuTensorInfo(out_tensors_[idx]); | auto img_info = GpuTensorInfo(out_tensors_[idx]); | ||||
| size_t img_dtype = CL_FLOAT; | size_t img_dtype = CL_FLOAT; | ||||
| switch (out_tensors_[idx]->data_type()) { | switch (out_tensors_[idx]->data_type()) { | ||||
| case kNumberTypeFloat32: | |||||
| case kNumberTypeInt32: | |||||
| case kNumberTypeUInt32: { | |||||
| case kNumberTypeFloat32: { | |||||
| img_dtype = CL_FLOAT; | img_dtype = CL_FLOAT; | ||||
| break; | break; | ||||
| } | } | ||||
| case kNumberTypeFloat16: | |||||
| case kNumberTypeInt16: | |||||
| case kNumberTypeUInt16: { | |||||
| case kNumberTypeInt32: { | |||||
| img_dtype = CL_SIGNED_INT32; | |||||
| break; | |||||
| } | |||||
| case kNumberTypeFloat16: { | |||||
| img_dtype = CL_HALF_FLOAT; | img_dtype = CL_HALF_FLOAT; | ||||
| break; | break; | ||||
| } | } | ||||
| @@ -244,7 +244,7 @@ kernel::LiteKernel *OpenCLKernelCreator(const std::vector<lite::Tensor *> &input | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| if (!opParameter->infer_flag_) { | if (!opParameter->infer_flag_) { | ||||
| MS_LOG(WARNING) << "kernel don't infer shape yet!"; | |||||
| MS_LOG(WARNING) << "kernel " << opParameter->name_ << "don't infer shape yet!"; | |||||
| return kernel; | return kernel; | ||||
| } | } | ||||
| auto ret = kernel->CheckSpecs(); | auto ret = kernel->CheckSpecs(); | ||||
| @@ -360,13 +360,10 @@ kernel::LiteKernel *Scheduler::FindGpuKernel(const std::vector<Tensor *> &in_ten | |||||
| if (context_->IsGpuEnabled()) { | if (context_->IsGpuEnabled()) { | ||||
| // support more data type like int32 | // support more data type like int32 | ||||
| kernel::KernelKey gpu_desc{kGPU, kNumberTypeFloat32, desc.type}; | |||||
| kernel::KernelKey gpu_desc{kGPU, desc.data_type, desc.type}; | |||||
| if (context_->IsGpuFloat16Enabled()) { | if (context_->IsGpuFloat16Enabled()) { | ||||
| gpu_desc.data_type = kNumberTypeFloat16; | gpu_desc.data_type = kNumberTypeFloat16; | ||||
| } | } | ||||
| if (in_tensors.front()->data_type() == kNumberTypeInt8) { | |||||
| gpu_desc.data_type = kNumberTypeInt8; | |||||
| } | |||||
| // weight dequant | // weight dequant | ||||
| auto ret = WeightDecoder::DequantNode(op_parameter, in_tensors, kNumberTypeFloat32); | auto ret = WeightDecoder::DequantNode(op_parameter, in_tensors, kNumberTypeFloat32); | ||||
| @@ -22,68 +22,75 @@ class TestOpenCL_Gather : public CommonTest {}; | |||||
| namespace { | namespace { | ||||
| // PrimitiveType_Gather: src/ops/populate/gather_populate.cc | // PrimitiveType_Gather: src/ops/populate/gather_populate.cc | ||||
| OpParameter *CreateParameter(int axis) { | |||||
| OpParameter *CreateParameter() { | |||||
| auto *param = test::CreateParameter<GatherParameter>(schema::PrimitiveType_Gather); | auto *param = test::CreateParameter<GatherParameter>(schema::PrimitiveType_Gather); | ||||
| param->axis_ = axis; | |||||
| return reinterpret_cast<OpParameter *>(param); | return reinterpret_cast<OpParameter *>(param); | ||||
| } | } | ||||
| } // namespace | } // namespace | ||||
| TEST_F(TestOpenCL_Gather, Axis0) { | TEST_F(TestOpenCL_Gather, Axis0) { | ||||
| int axis = 0; | |||||
| std::vector<int> input_shape = {10}; | std::vector<int> input_shape = {10}; | ||||
| std::vector<int> indices_shape = {5}; | std::vector<int> indices_shape = {5}; | ||||
| std::vector<int> output_shape = {5}; | std::vector<int> output_shape = {5}; | ||||
| std::vector<int> axis_shape = {1}; | |||||
| int32_t axis_data[] = {0}; | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; | float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; | ||||
| int32_t indices[] = {1, 3, 5, 7, 9}; | int32_t indices[] = {1, 3, 5, 7, 9}; | ||||
| float output_data[] = {1, 3, 5, 7, 9}; | float output_data[] = {1, 3, 5, 7, 9}; | ||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | |||||
| TestMain( | |||||
| {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {output_shape, output_data}, param, fp16_enable); | |||||
| auto *param = CreateParameter(); | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32}, | |||||
| {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {output_shape, output_data}, param, fp16_enable); | |||||
| } | } | ||||
| } | } | ||||
| TEST_F(TestOpenCL_Gather, Axis0ConstTensor) { | TEST_F(TestOpenCL_Gather, Axis0ConstTensor) { | ||||
| int axis = 0; | |||||
| std::vector<int> input_shape = {10}; | std::vector<int> input_shape = {10}; | ||||
| std::vector<int> indices_shape = {1}; | std::vector<int> indices_shape = {1}; | ||||
| std::vector<int> output_shape = {1}; | std::vector<int> output_shape = {1}; | ||||
| std::vector<int> axis_shape = {1}; | |||||
| int32_t axis_data[] = {0}; | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; | float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; | ||||
| int32_t indices[] = {1}; | int32_t indices[] = {1}; | ||||
| float output_data[] = {1}; | float output_data[] = {1}; | ||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | |||||
| TestMain( | |||||
| {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {output_shape, output_data}, param, fp16_enable); | |||||
| auto *param = CreateParameter(); | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32}, | |||||
| {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {output_shape, output_data}, param, fp16_enable); | |||||
| } | } | ||||
| } | } | ||||
| TEST_F(TestOpenCL_Gather, Axis0_Tensor) { | TEST_F(TestOpenCL_Gather, Axis0_Tensor) { | ||||
| int axis = 0; | |||||
| std::vector<int> input_shape = {10}; | std::vector<int> input_shape = {10}; | ||||
| std::vector<int> indices_shape = {1}; | std::vector<int> indices_shape = {1}; | ||||
| std::vector<int> output_shape = {1}; | std::vector<int> output_shape = {1}; | ||||
| std::vector<int> axis_shape = {1}; | |||||
| int32_t axis_data[] = {0}; | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; | float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; | ||||
| int32_t indices[] = {1}; | int32_t indices[] = {1}; | ||||
| float output_data[] = {1}; | float output_data[] = {1}; | ||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}}, | |||||
| auto *param = CreateParameter(); | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| {indices_shape, indices, VAR, kNumberTypeInt32}, | |||||
| {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | ||||
| } | } | ||||
| } | } | ||||
| TEST_F(TestOpenCL_Gather, Axis1) { | TEST_F(TestOpenCL_Gather, Axis1) { | ||||
| int axis = 1; | |||||
| std::vector<int> input_shape = {1, 5, 4, 4}; | std::vector<int> input_shape = {1, 5, 4, 4}; | ||||
| std::vector<int> indices_shape = {2}; | std::vector<int> indices_shape = {2}; | ||||
| std::vector<int> output_shape = {1, 2, 4, 4}; | std::vector<int> output_shape = {1, 2, 4, 4}; | ||||
| std::vector<int> axis_shape = {1}; | |||||
| int32_t axis_data[] = {1}; | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | ||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | ||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | ||||
| @@ -100,19 +107,21 @@ TEST_F(TestOpenCL_Gather, Axis1) { | |||||
| for (int i = 0; i < 1; ++i) { | for (int i = 0; i < 1; ++i) { | ||||
| for (auto fp16_enable : {false, true}) { | for (auto fp16_enable : {false, true}) { | ||||
| auto *param = CreateParameter(axis); | |||||
| auto *param = CreateParameter(); | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | ||||
| {indices_shape, indices_datas[i], CONST_TENSOR, data_types[i]}}, | |||||
| {indices_shape, indices_datas[i], CONST_TENSOR, data_types[i]}, | |||||
| {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {output_shape, output_data}, param, fp16_enable); | {output_shape, output_data}, param, fp16_enable); | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| TEST_F(TestOpenCL_Gather, Axis1_intensor1) { | TEST_F(TestOpenCL_Gather, Axis1_intensor1) { | ||||
| int axis = 1; | |||||
| std::vector<int> input_shape = {1, 5, 4, 4}; | std::vector<int> input_shape = {1, 5, 4, 4}; | ||||
| std::vector<int> indices_shape = {2}; | std::vector<int> indices_shape = {2}; | ||||
| std::vector<int> output_shape = {1, 2, 4, 4}; | std::vector<int> output_shape = {1, 2, 4, 4}; | ||||
| std::vector<int> axis_shape = {1}; | |||||
| int32_t axis_data[] = {1}; | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | ||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | ||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | ||||
| @@ -129,19 +138,21 @@ TEST_F(TestOpenCL_Gather, Axis1_intensor1) { | |||||
| for (int i = 0; i < 1; ++i) { | for (int i = 0; i < 1; ++i) { | ||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | |||||
| TestMain( | |||||
| {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices_datas[i], VAR, data_types[i]}}, | |||||
| {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||||
| auto *param = CreateParameter(); | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| {indices_shape, indices_datas[i], VAR, data_types[i]}, | |||||
| {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| TEST_F(TestOpenCL_Gather, Axis2) { | TEST_F(TestOpenCL_Gather, Axis2) { | ||||
| int axis = 2; | |||||
| std::vector<int> input_shape = {1, 5, 4, 4}; | std::vector<int> input_shape = {1, 5, 4, 4}; | ||||
| std::vector<int> indices_shape = {2}; | std::vector<int> indices_shape = {2}; | ||||
| std::vector<int> output_shape = {1, 5, 2, 4}; | std::vector<int> output_shape = {1, 5, 2, 4}; | ||||
| std::vector<int> axis_shape = {1}; | |||||
| int32_t axis_data[] = {2}; | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | ||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | ||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | ||||
| @@ -151,18 +162,20 @@ TEST_F(TestOpenCL_Gather, Axis2) { | |||||
| 44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63, 68, 69, 70, 71, 76, 77, 78, 79}; | 44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63, 68, 69, 70, 71, 76, 77, 78, 79}; | ||||
| for (auto fp16_enable : {false, true}) { | for (auto fp16_enable : {false, true}) { | ||||
| auto *param = CreateParameter(axis); | |||||
| TestMain( | |||||
| {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {output_shape, output_data}, param, fp16_enable); | |||||
| auto *param = CreateParameter(); | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32}, | |||||
| {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {output_shape, output_data}, param, fp16_enable); | |||||
| } | } | ||||
| } | } | ||||
| TEST_F(TestOpenCL_Gather, Axis2_intensor1) { | TEST_F(TestOpenCL_Gather, Axis2_intensor1) { | ||||
| int axis = 2; | |||||
| std::vector<int> input_shape = {1, 5, 4, 4}; | std::vector<int> input_shape = {1, 5, 4, 4}; | ||||
| std::vector<int> indices_shape = {2}; | std::vector<int> indices_shape = {2}; | ||||
| std::vector<int> output_shape = {1, 5, 2, 4}; | std::vector<int> output_shape = {1, 5, 2, 4}; | ||||
| std::vector<int> axis_shape = {1}; | |||||
| int32_t axis_data[] = {2}; | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | ||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | ||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | ||||
| @@ -172,17 +185,20 @@ TEST_F(TestOpenCL_Gather, Axis2_intensor1) { | |||||
| 44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63, 68, 69, 70, 71, 76, 77, 78, 79}; | 44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63, 68, 69, 70, 71, 76, 77, 78, 79}; | ||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}}, | |||||
| auto *param = CreateParameter(); | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| {indices_shape, indices, VAR, kNumberTypeInt32}, | |||||
| {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {output_shape, output_data}, param, fp16_enable); | {output_shape, output_data}, param, fp16_enable); | ||||
| } | } | ||||
| } | } | ||||
| TEST_F(TestOpenCL_Gather, Axis3) { | TEST_F(TestOpenCL_Gather, Axis3) { | ||||
| int axis = 3; | |||||
| std::vector<int> input_shape = {1, 5, 4, 4}; | std::vector<int> input_shape = {1, 5, 4, 4}; | ||||
| std::vector<int> indices_shape = {2}; | std::vector<int> indices_shape = {2}; | ||||
| std::vector<int> output_shape = {1, 5, 4, 2}; | std::vector<int> output_shape = {1, 5, 4, 2}; | ||||
| std::vector<int> axis_shape = {1}; | |||||
| int32_t axis_data[] = {3}; | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | ||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | ||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | ||||
| @@ -192,18 +208,20 @@ TEST_F(TestOpenCL_Gather, Axis3) { | |||||
| 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79}; | 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79}; | ||||
| for (auto fp16_enable : {false, true}) { | for (auto fp16_enable : {false, true}) { | ||||
| auto *param = CreateParameter(axis); | |||||
| TestMain( | |||||
| {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {output_shape, output_data}, param, fp16_enable); | |||||
| auto *param = CreateParameter(); | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32}, | |||||
| {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {output_shape, output_data}, param, fp16_enable); | |||||
| } | } | ||||
| } | } | ||||
| TEST_F(TestOpenCL_Gather, Axis3_intensor1) { | TEST_F(TestOpenCL_Gather, Axis3_intensor1) { | ||||
| int axis = 3; | |||||
| std::vector<int> input_shape = {1, 5, 4, 4}; | std::vector<int> input_shape = {1, 5, 4, 4}; | ||||
| std::vector<int> indices_shape = {2}; | std::vector<int> indices_shape = {2}; | ||||
| std::vector<int> output_shape = {1, 5, 4, 2}; | std::vector<int> output_shape = {1, 5, 4, 2}; | ||||
| std::vector<int> axis_shape = {1}; | |||||
| int32_t axis_data[] = {3}; | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | ||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | ||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | ||||
| @@ -213,8 +231,10 @@ TEST_F(TestOpenCL_Gather, Axis3_intensor1) { | |||||
| 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79}; | 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79}; | ||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}}, | |||||
| auto *param = CreateParameter(); | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| {indices_shape, indices, VAR, kNumberTypeInt32}, | |||||
| {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {output_shape, output_data}, param, fp16_enable); | {output_shape, output_data}, param, fp16_enable); | ||||
| } | } | ||||
| } | } | ||||
| @@ -42,7 +42,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis3Fp32) { | |||||
| float output_data[] = {-1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}; | float output_data[] = {-1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}; | ||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -63,7 +63,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis3T2Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -86,7 +86,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis3T3Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -109,7 +109,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis3T4Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -130,7 +130,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis2Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -152,7 +152,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis2T2Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -173,7 +173,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis2T3Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -196,7 +196,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis2T4Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -219,7 +219,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis1T1Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -240,7 +240,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis1T2Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -263,7 +263,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis1T3Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -284,7 +284,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis0Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -307,7 +307,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis0T2Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -332,7 +332,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis0T3Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -354,7 +354,7 @@ TEST_F(TestOpenCL_OneHot, OneHot3DAxis0Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -377,7 +377,7 @@ TEST_F(TestOpenCL_OneHot, OneHot3DAxis0T2Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -399,7 +399,7 @@ TEST_F(TestOpenCL_OneHot, OneHot3DAxis1Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -422,7 +422,7 @@ TEST_F(TestOpenCL_OneHot, OneHot3DAxis1T2Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -443,7 +443,7 @@ TEST_F(TestOpenCL_OneHot, OneHot3DAxis2Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -466,7 +466,7 @@ TEST_F(TestOpenCL_OneHot, OneHot3DAxis2T2Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -487,7 +487,7 @@ TEST_F(TestOpenCL_OneHot, OneHot2DAxis0Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -508,7 +508,7 @@ TEST_F(TestOpenCL_OneHot, OneHot2DAxis0T2Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -529,7 +529,7 @@ TEST_F(TestOpenCL_OneHot, OneHot2DAxis1Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -551,7 +551,7 @@ TEST_F(TestOpenCL_OneHot, OneHot2DAxis1T2Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -572,7 +572,7 @@ TEST_F(TestOpenCL_OneHot, OneHot1DAxis0Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -593,7 +593,7 @@ TEST_F(TestOpenCL_OneHot, OneHot1DAxis0T2Fp32) { | |||||
| for (auto fp16_enable : {false}) { | for (auto fp16_enable : {false}) { | ||||
| auto *param = CreateParameter(axis); | auto *param = CreateParameter(axis); | ||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeInt32}, | |||||
| {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, | ||||
| {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, | ||||
| {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, | ||||
| @@ -34,7 +34,9 @@ TEST_F(TestOpenCL_Reshape, 4D_2D_test0) { | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6}; | float input_data[] = {0, 1, 2, 3, 4, 5, 6}; | ||||
| float output_data[] = {0, 1, 2, 3, 4, 5, 6}; | float output_data[] = {0, 1, 2, 3, 4, 5, 6}; | ||||
| for (auto fp16_enable : {false, true}) { | for (auto fp16_enable : {false, true}) { | ||||
| TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32}, | |||||
| {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| } | } | ||||
| } | } | ||||
| @@ -44,7 +46,9 @@ TEST_F(TestOpenCL_Reshape, 4D_4D_test0) { | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; | float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; | ||||
| float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; | float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; | ||||
| for (auto fp16_enable : {false, true}) { | for (auto fp16_enable : {false, true}) { | ||||
| TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32}, | |||||
| {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| } | } | ||||
| } | } | ||||
| @@ -54,7 +58,9 @@ TEST_F(TestOpenCL_Reshape, 4D_2D_test1) { | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; | float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; | ||||
| float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; | float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; | ||||
| for (auto fp16_enable : {false, true}) { | for (auto fp16_enable : {false, true}) { | ||||
| TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32}, | |||||
| {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| } | } | ||||
| } | } | ||||
| @@ -64,7 +70,9 @@ TEST_F(TestOpenCL_Reshape, 4D_4D_test1) { | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; | float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; | ||||
| float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; | float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; | ||||
| for (auto fp16_enable : {false, true}) { | for (auto fp16_enable : {false, true}) { | ||||
| TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32}, | |||||
| {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| } | } | ||||
| } | } | ||||
| @@ -74,7 +82,9 @@ TEST_F(TestOpenCL_Reshape, 4D_4D_test2) { | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; | float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; | ||||
| float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; | float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; | ||||
| for (auto fp16_enable : {false, true}) { | for (auto fp16_enable : {false, true}) { | ||||
| TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32}, | |||||
| {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| } | } | ||||
| } | } | ||||
| @@ -90,7 +100,9 @@ TEST_F(TestOpenCL_Reshape, 4D_4D_test3) { | |||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | ||||
| }; | }; | ||||
| for (auto fp16_enable : {false, true}) { | for (auto fp16_enable : {false, true}) { | ||||
| TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32}, | |||||
| {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| } | } | ||||
| } | } | ||||
| @@ -119,7 +131,9 @@ TEST_F(TestOpenCL_Reshape, 4D_4D_test4) { | |||||
| 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199}; | 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199}; | ||||
| for (auto fp16_enable : {false, true}) { | for (auto fp16_enable : {false, true}) { | ||||
| TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32}, | |||||
| {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| } | } | ||||
| } | } | ||||
| @@ -131,7 +145,9 @@ TEST_F(TestOpenCL_Reshape, 4D_4D_test5) { | |||||
| float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, | float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, | ||||
| 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29}; | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29}; | ||||
| for (auto fp16_enable : {false, true}) { | for (auto fp16_enable : {false, true}) { | ||||
| TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32}, | |||||
| {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| } | } | ||||
| } | } | ||||
| @@ -152,7 +168,9 @@ TEST_F(TestOpenCL_Reshape, 3D_2D_test6) { | |||||
| 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}; | 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}; | ||||
| for (auto fp16_enable : {false, true}) { | for (auto fp16_enable : {false, true}) { | ||||
| TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32}, | |||||
| {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}}, | |||||
| {shape_out, output_data}, CreateParameter(), fp16_enable); | |||||
| } | } | ||||
| } | } | ||||
| } // namespace mindspore::lite::opencl::test | } // namespace mindspore::lite::opencl::test | ||||