From fe7014c727966edad51f303c7c7a10152027a09c Mon Sep 17 00:00:00 2001 From: chenzupeng Date: Wed, 20 Jan 2021 10:52:22 +0800 Subject: [PATCH] add depthtospace --- .../kernel/opencl/cl/space_to_depth.cl | 54 ++++++ .../kernel/opencl/kernel/space_to_depth.cc | 21 ++- .../kernel/opencl/space_to_depth_tests.cc | 170 ++++++++++++++++++ 3 files changed, 240 insertions(+), 5 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/space_to_depth.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/space_to_depth.cl index f1aaf13a45..4fa711e2de 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/space_to_depth.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/space_to_depth.cl @@ -56,3 +56,57 @@ __kernel void SpaceToDepthAlign(__read_only image2d_t src_data, __write_only ima WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z), READ_IMAGE(src_data, smp_zero, (int2)(wi * in_shape.w + ci, ni * in_shape.y + hi))); } + +__kernel void DepthToSpace(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 in_shape, + int4 out_shape, int block_size, int co_size) { + int X = get_global_id(0); // C4 + int Y = get_global_id(1); // W + int Z = get_global_id(2); // H * N + if (X >= out_shape.w || Y >= out_shape.z || Z >= out_shape.x * out_shape.y) return; + if (out_shape.y == 0 || co_size == 0) return; + int N = Z / out_shape.y; + int H = Z % out_shape.y; + int co_base = X * C4NUM; + FLT result[C4NUM] = {0.f}; + for (int i = 0; i < C4NUM; i++) { + int co = co_base + i; + int bh = H % block_size; + int hi = H / block_size; + int bw = Y % block_size; + int wi = Y / block_size; + int ci = (bh * block_size + bw) * co_size + co; + int ci4 = ci / C4NUM; + int ci4_ramainder = ci % C4NUM; + FLT4 tmp = READ_IMAGE(src_data, smp_zero, (int2)(wi * in_shape.w + ci4, N * in_shape.y + hi)); + if (ci4_ramainder == 0) { + result[i] = tmp.x; + } else if (ci4_ramainder == 1) { + result[i] = tmp.y; + } else if (ci4_ramainder == 2) { + result[i] = tmp.z; + } else { + result[i] = tmp.w; + } + } + FLT4 result_flt4 = {result[0], result[1], result[2], result[3]}; + WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z), result_flt4); +} + +__kernel void DepthToSpaceAlign(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 in_shape, + int4 out_shape, int block_size, int co_size) { + int X = get_global_id(0); // C4 + int Y = get_global_id(1); // W + int Z = get_global_id(2); // H * N + if (X >= out_shape.w || Y >= out_shape.z || Z >= out_shape.x * out_shape.y) return; + if (out_shape.y == 0 || block_size == 0) return; + int N = Z / out_shape.y; + int H = Z % out_shape.y; + int ni = N; + int bh = H % block_size; + int hi = H / block_size; + int bw = Y % block_size; + int wi = Y / block_size; + int ci = (bh * block_size + bw) * out_shape.w + X; + WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z), + READ_IMAGE(src_data, smp_zero, (int2)(wi * in_shape.w + ci, ni * in_shape.y + hi))); +} diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc index 686664eb29..b301ae046f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc @@ -28,6 +28,7 @@ using mindspore::lite::RET_ERROR; using mindspore::lite::RET_NULL_PTR; using mindspore::lite::RET_OK; using mindspore::lite::RET_PARAM_INVALID; +using mindspore::schema::PrimitiveType_DepthToSpace; using mindspore::schema::PrimitiveType_SpaceToDepth; namespace mindspore::kernel { @@ -43,10 +44,13 @@ int SpaceToDepthOpenCLKernel::Prepare() { std::string kernel_name; in_shape_ = GpuTensorInfo(in_tensors_[0]); out_shape_ = GpuTensorInfo(out_tensors_[0]); - if (in_shape_.C % C4NUM != 0) { - kernel_name = "SpaceToDepth"; + if (Type() == PrimitiveType_DepthToSpace) { + kernel_name = "DepthToSpace"; } else { - kernel_name = "SpaceToDepthAlign"; + kernel_name = "SpaceToDepth"; + } + if (in_shape_.C % C4NUM == 0 && out_shape_.C % C4NUM == 0) { + kernel_name += "Align"; } #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); @@ -71,8 +75,13 @@ void SpaceToDepthOpenCLKernel::SetConstArgs() { ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_shape); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->block_size_); - int ci_size = in_shape_.C; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, ci_size); + if (Type() == PrimitiveType_DepthToSpace) { + int co_size = out_shape_.C; + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, co_size); + } else { + int ci_size = in_shape_.C; + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, ci_size); + } } void SpaceToDepthOpenCLKernel::SetGlobalLocal() { local_size_ = {}; @@ -91,4 +100,6 @@ int SpaceToDepthOpenCLKernel::Run() { REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_SpaceToDepth, OpenCLKernelCreator) REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_SpaceToDepth, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_DepthToSpace, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_DepthToSpace, OpenCLKernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/space_to_depth_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/space_to_depth_tests.cc index 836abfed04..81bbfad0e5 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/space_to_depth_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/space_to_depth_tests.cc @@ -15,10 +15,12 @@ */ #include "ut/src/runtime/kernel/opencl/common.h" #include "nnacl/fp32/space_to_depth_fp32.h" +#include "nnacl/depth_to_space_parameter.h" namespace mindspore::lite::opencl::test { class TestOpenCL_SpaceToDepth : public CommonTest {}; +class TestOpenCL_DepthToSpace : public CommonTest {}; namespace { // PrimitiveType_SpaceToDepth: src/ops/populate/space_to_depth_populate.cc @@ -27,6 +29,12 @@ OpParameter *CreateParameter(int block_size) { param->block_size_ = block_size; return reinterpret_cast(param); } + +OpParameter *CreateDepthToSpaceParameter(int block_size) { + auto *param = test::CreateParameter(schema::PrimitiveType_DepthToSpace); + param->block_size_ = block_size; + return reinterpret_cast(param); +} } // namespace TEST_F(TestOpenCL_SpaceToDepth, AlignTest1) { @@ -163,4 +171,166 @@ TEST_F(TestOpenCL_SpaceToDepth, NotAlignTest4) { } } +TEST_F(TestOpenCL_DepthToSpace, AlignTest1) { + int block_size = 2; + std::vector input_shape = {1, 2, 2, 16}; + std::vector output_shape = {1, input_shape[1] * block_size, input_shape[2] * block_size, + input_shape[3] / (block_size * block_size)}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; + float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, + 14, 15, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 48, 49, 50, 51, + 52, 53, 54, 55, 40, 41, 42, 43, 44, 45, 46, 47, 56, 57, 58, 59, 60, 61, 62, 63}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateDepthToSpaceParameter(block_size); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } +} + +TEST_F(TestOpenCL_DepthToSpace, AlignTest2) { + int block_size = 2; + std::vector input_shape = {1, 1, 2, 16}; + std::vector output_shape = {1, input_shape[1] * block_size, input_shape[2] * block_size, + input_shape[3] / (block_size * block_size)}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, + 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateDepthToSpaceParameter(block_size); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } +} + +TEST_F(TestOpenCL_DepthToSpace, AlignTest3) { + int block_size = 3; + std::vector input_shape = {1, 1, 2, 36}; + std::vector output_shape = {1, input_shape[1] * block_size, input_shape[2] * block_size, + input_shape[3] / (block_size * block_size)}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}; + float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateDepthToSpaceParameter(block_size); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } +} + +TEST_F(TestOpenCL_DepthToSpace, AlignTest4) { + int block_size = 4; + std::vector input_shape = {1, 1, 1, 64}; + std::vector output_shape = {1, input_shape[1] * block_size, input_shape[2] * block_size, + input_shape[3] / (block_size * block_size)}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; + float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateDepthToSpaceParameter(block_size); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } +} + +TEST_F(TestOpenCL_DepthToSpace, NotAlignTest1) { + int block_size = 2; + std::vector input_shape = {1, 3, 3, 8}; + std::vector output_shape = {1, input_shape[1] * block_size, input_shape[2] * block_size, + input_shape[3] / (block_size * block_size)}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}; + float output_data[] = {0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 4, 5, 6, 7, 12, 13, + 14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 32, 33, 34, 35, 40, 41, 42, 43, + 28, 29, 30, 31, 36, 37, 38, 39, 44, 45, 46, 47, 48, 49, 50, 51, 56, 57, + 58, 59, 64, 65, 66, 67, 52, 53, 54, 55, 60, 61, 62, 63, 68, 69, 70, 71}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateDepthToSpaceParameter(block_size); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } +} + +TEST_F(TestOpenCL_DepthToSpace, NotAlignTest2) { + int block_size = 3; + std::vector input_shape = {1, 3, 3, 9}; + std::vector output_shape = {1, input_shape[1] * block_size, input_shape[2] * block_size, + input_shape[3] / (block_size * block_size)}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80}; + float output_data[] = {0, 1, 2, 9, 10, 11, 18, 19, 20, 3, 4, 5, 12, 13, 14, 21, 22, 23, 6, 7, 8, + 15, 16, 17, 24, 25, 26, 27, 28, 29, 36, 37, 38, 45, 46, 47, 30, 31, 32, 39, 40, 41, + 48, 49, 50, 33, 34, 35, 42, 43, 44, 51, 52, 53, 54, 55, 56, 63, 64, 65, 72, 73, 74, + 57, 58, 59, 66, 67, 68, 75, 76, 77, 60, 61, 62, 69, 70, 71, 78, 79, 80}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateDepthToSpaceParameter(block_size); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } +} + +TEST_F(TestOpenCL_DepthToSpace, NotAlignTest3) { + int block_size = 4; + std::vector input_shape = {1, 3, 2, 32}; + std::vector output_shape = {1, input_shape[1] * block_size, input_shape[2] * block_size, + input_shape[3] / (block_size * block_size)}; + float input_data[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, + 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, + 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, + 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191}; + float output_data[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 32, 33, 34, 35, 36, 37, 38, 39, 8, 9, 10, 11, 12, 13, + 14, 15, 40, 41, 42, 43, 44, 45, 46, 47, 16, 17, 18, 19, 20, 21, 22, 23, 48, 49, 50, 51, + 52, 53, 54, 55, 24, 25, 26, 27, 28, 29, 30, 31, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, + 66, 67, 68, 69, 70, 71, 96, 97, 98, 99, 100, 101, 102, 103, 72, 73, 74, 75, 76, 77, 78, 79, + 104, 105, 106, 107, 108, 109, 110, 111, 80, 81, 82, 83, 84, 85, 86, 87, 112, 113, 114, 115, 116, 117, + 118, 119, 88, 89, 90, 91, 92, 93, 94, 95, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 160, 161, 162, 163, 164, 165, 166, 167, 136, 137, 138, 139, 140, 141, 142, 143, 168, 169, + 170, 171, 172, 173, 174, 175, 144, 145, 146, 147, 148, 149, 150, 151, 176, 177, 178, 179, 180, 181, 182, 183, + 152, 153, 154, 155, 156, 157, 158, 159, 184, 185, 186, 187, 188, 189, 190, 191}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateDepthToSpaceParameter(block_size); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } +} + +TEST_F(TestOpenCL_DepthToSpace, NotAlignTest4) { + int block_size = 2; + std::vector input_shape = {1, 3, 4, 8}; + std::vector output_shape = {1, input_shape[1] * block_size, input_shape[2] * block_size, + input_shape[3] / (block_size * block_size)}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95}; + float output_data[] = {0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27, 4, 5, 6, 7, + 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31, 32, 33, 34, 35, 40, 41, 42, 43, + 48, 49, 50, 51, 56, 57, 58, 59, 36, 37, 38, 39, 44, 45, 46, 47, 52, 53, 54, 55, + 60, 61, 62, 63, 64, 65, 66, 67, 72, 73, 74, 75, 80, 81, 82, 83, 88, 89, 90, 91, + 68, 69, 70, 71, 76, 77, 78, 79, 84, 85, 86, 87, 92, 93, 94, 95}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateDepthToSpaceParameter(block_size); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } +} } // namespace mindspore::lite::opencl::test