diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/cast.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/cast.cl index 76965ad43e..f2d5dfda3d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/cast.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/cast.cl @@ -8,7 +8,7 @@ __kernel void Cast_fp32_to_fp16(__read_only image2d_t input, __write_only image2 if (x >= XY.x || y >= XY.y) { return; } - half4 result = convert_half4(READ_IMAGE(input, smp_none, (int2)(x, y))); + half4 result = convert_half4(read_imagef(input, smp_none, (int2)(x, y))); write_imageh(output, (int2)(x, y), result); } @@ -18,8 +18,8 @@ __kernel void Cast_fp32_to_fp32(__read_only image2d_t input, __write_only image2 if (x >= XY.x || y >= XY.y) { return; } - float4 result = READ_IMAGE(input, smp_none, (int2)(x, y)); - write_imageh(output, (int2)(x, y), result); + float4 result = read_imagef(input, smp_none, (int2)(x, y)); + write_imagef(output, (int2)(x, y), result); } __kernel void Cast_fp16_to_fp16(__read_only image2d_t input, __write_only image2d_t output, int2 XY) { @@ -28,7 +28,7 @@ __kernel void Cast_fp16_to_fp16(__read_only image2d_t input, __write_only image2 if (x >= XY.x || y >= XY.y) { return; } - half4 result = READ_IMAGE(input, smp_none, (int2)(x, y)); + half4 result = read_imageh(input, smp_none, (int2)(x, y)); write_imageh(output, (int2)(x, y), result); } @@ -38,6 +38,6 @@ __kernel void Cast_fp16_to_fp32(__read_only image2d_t input, __write_only image2 if (x >= XY.x || y >= XY.y) { return; } - float4 result = convert_float4(READ_IMAGE(input, smp_none, (int2)(x, y))); - write_imageh(output, (int2)(x, y), result); + float4 result = convert_float4(read_imageh(input, smp_none, (int2)(x, y))); + write_imagef(output, (int2)(x, y), result); } diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/sparse_to_dense.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/sparse_to_dense.cl index 67d4d4a6f4..07af10c86c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/sparse_to_dense.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/sparse_to_dense.cl @@ -10,16 +10,17 @@ __kernel void SparseToDenseScalar(__read_only image2d_t input, __global float *o return; } FLT4 index_input = READ_IMAGE(input, smp_zero, (int2)(Y, X)); + int4 index_input_int = *((int4 *)&index_input); int index = 0; if (inshapeindex1_dim == 1) { - index = ((int)index_input.x) * stride_w; + index = (index_input_int.x) * stride_w; } else if (inshapeindex1_dim == 2) { - index = ((int)index_input.x) * stride_w + ((int)index_input.y); + index = (index_input_int.x) * stride_w + (index_input_int.y); } else if (inshapeindex1_dim == 3) { - index = ((int)index_input.x) * stride_w + ((int)index_input.y) * outputshape.w * C4NUM + ((int)index_input.z); + index = (index_input_int.x) * stride_w + (index_input_int.y) * outputshape.w * C4NUM + (index_input_int.z); } else { - index = ((int)index_input.x) * outputshape.y * stride_w + ((int)index_input.y) * stride_w + - ((int)index_input.z) * outputshape.w * C4NUM + (int)index_input.w; + index = (index_input_int.x) * outputshape.y * stride_w + (index_input_int.y) * stride_w + + (index_input_int.z) * outputshape.w * C4NUM + index_input_int.w; } output[index] = weight; } @@ -33,16 +34,17 @@ __kernel void SparseToDenseVector(__read_only image2d_t input, __global float *o return; } FLT4 index_input = READ_IMAGE(input, smp_zero, (int2)(Y, X)); + int4 index_input_int = *((int4 *)&index_input); int index = 0; if (inshapeindex1_dim == 1) { - index = ((int)index_input.x) * stride_w; + index = (index_input_int.x) * stride_w; } else if (inshapeindex1_dim == 2) { - index = ((int)index_input.x) * stride_w + (int)index_input.y; + index = (index_input_int.x) * stride_w + index_input_int.y; } else if (inshapeindex1_dim == 3) { - index = ((int)index_input.x) * stride_w + ((int)index_input.y) * outputshape.w * C4NUM + (int)index_input.z; + index = (index_input_int.x) * stride_w + (index_input_int.y) * outputshape.w * C4NUM + index_input_int.z; } else { - index = ((int)index_input.x) * outputshape.y * stride_w + ((int)index_input.y) * stride_w + - ((int)index_input.z) * outputshape.w * C4NUM + (int)index_input.w; + index = (index_input_int.x) * outputshape.y * stride_w + (index_input_int.y) * stride_w + + (index_input_int.z) * outputshape.w * C4NUM + index_input_int.w; } output[index] = weight_vector[X]; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc index 8ae12f75c8..c85d0ad7ae 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc @@ -45,16 +45,18 @@ int CastOpenCLKernel::CheckSpecs() { auto input_dtype = in_tensors_.front()->data_type(); if (input_dtype != kNumberTypeFloat32 && input_dtype != kNumberTypeFloat16) { MS_LOG(ERROR) << "input dtype must be float32/float16"; + return RET_ERROR; } auto output_dtype = out_tensors_.front()->data_type(); if (output_dtype != kNumberTypeFloat32 && output_dtype != kNumberTypeFloat16) { MS_LOG(ERROR) << "output dtype must be float32/float16"; + return RET_ERROR; } return RET_OK; } void CastOpenCLKernel::SetConstArgs() { - cl_int4 shape = {static_cast(shape_.width), static_cast(shape_.height)}; + cl_int2 shape = {static_cast(shape_.width), static_cast(shape_.height)}; ocl_runtime_->SetKernelArg(kernel_, 2, shape); } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc index 97f45de54d..ad4e9a3999 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc @@ -108,10 +108,6 @@ int SparseToDenseOpenCLKernel::CheckSpecs() { return ERROR; } } - if (inshapeindex1_dim > 4) { - MS_LOG(ERROR) << "Unsupported input_indices[1] > 4: "; - return ERROR; - } auto param = reinterpret_cast(op_parameter_); if (param->validate_indices_) { MS_LOG(ERROR) << "Unsupported unordered for in_tensors_indices"; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc index 426d76b101..dbb314689b 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc @@ -59,15 +59,9 @@ int SplitOpenCLKernel::RunAxis0() { int SplitOpenCLKernel::CheckSpecs() { auto param = reinterpret_cast(this->op_parameter_); - if (param->split_dim_) { - if (out_tensors_.size() != 2 || in_tensors_.size() != 1) { - MS_LOG(ERROR) << "in size: " << in_tensors_.size() << ", out size: " << out_tensors_.size(); - return RET_ERROR; - } - if (param->num_split_ != 2) { - MS_LOG(ERROR) << "num_split_(should be 2): " << param->num_split_; - return RET_ERROR; - } + if ((out_tensors_.size() != 2 || (out_tensors_.size() != 3 && param->split_dim_ == 0)) && in_tensors_.size() != 1) { + MS_LOG(ERROR) << "in size: " << in_tensors_.size() << ", out size: " << out_tensors_.size(); + return RET_ERROR; } if (in_tensors_.at(0)->IsConst()) { MS_LOG(ERROR) << "in_tensors_ must be tensor"; @@ -79,6 +73,11 @@ int SplitOpenCLKernel::CheckSpecs() { return RET_ERROR; } } + + if (param->num_split_ != 2 && (param->num_split_ != 3 && param->split_dim_ == 0)) { + MS_LOG(ERROR) << "num_split_ only supported 2 or (3 && split_dim_ = 0) yet"; + return RET_ERROR; + } if (param->split_dim_ < 0 || param->split_dim_ > 3) { MS_LOG(ERROR) << "split_dim_ must between 0~3"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc index 7c1fda4397..d48551d831 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc @@ -67,8 +67,8 @@ void StackGetWorkGroup(const std::vector &global, std::vector *l int StackOpenCLKernel::CheckSpecs() { auto param = reinterpret_cast(this->op_parameter_); axis_ = param->axis_; - if (in_tensors_.size() != 2 && (axis_ != 0)) { - MS_LOG(ERROR) << " only support input size = 2 "; + if (in_tensors_.size() != 2 && out_tensors_.size() != 1) { + MS_LOG(ERROR) << " only support input size = 2 and output size = 1"; return RET_ERROR; } if (in_tensors_[0]->shape().size() > 4 || in_tensors_[0]->shape().size() <= 0) { diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/split_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/split_tests.cc index 51ce3e0e1d..5de76a5a89 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/split_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/split_tests.cc @@ -54,4 +54,24 @@ TEST_F(TestOpenCL_Split, input2_axis3) { fp16_enable, fp16_enable ? 1e-3 : 1e-9); } } + +TEST_F(TestOpenCL_Split, input3_axis0) { + std::vector input_shape = {8, 1, 1, 1}; + std::vector output_shape1 = {2, 1, 1, 1}; + std::vector output_shape2 = {3, 1, 1, 1}; + std::vector output_shape3 = {3, 1, 1, 1}; + int split_dim_ = 0; + int num_split_ = 3; // len of split_sizes_ + std::vector split_sizes_{2, 3, 3}; + float input_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.03, 0.37}; + float output_data1[] = {0.75, 0.06}; + float output_data2[] = {0.74, 0.30, 0.9}; + float output_data3[] = {0.59, 0.03, 0.37}; + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(split_dim_, num_split_, split_sizes_); + TestMain({{input_shape, input_data, VAR}}, + {{output_shape1, output_data1}, {output_shape2, output_data2}, {output_shape3, output_data3}}, param, + fp16_enable, fp16_enable ? 1e-3 : 1e-9); + } +} } // namespace mindspore::lite::opencl::test