From b284dbd0f4f77efc2334c44d924bfefa2b96f376 Mon Sep 17 00:00:00 2001 From: nihui Date: Sat, 29 Mar 2025 10:16:25 +0800 Subject: [PATCH] discover VK_KHR_shader_non_semantic_info, checked convolution imagestore (#5955) --- .github/workflows/test-coverage.yml | 1 - src/gpu.cpp | 11 +++++++++++ src/gpu.h | 1 + src/layer/vulkan/shader/convolution.comp | 17 ++++++++++------- .../vulkan/shader/convolution_pack1to4.comp | 17 ++++++++++------- .../vulkan/shader/convolution_pack1to8.comp | 17 ++++++++++------- src/layer/vulkan/shader/convolution_pack4.comp | 17 ++++++++++------- .../vulkan/shader/convolution_pack4to1.comp | 17 ++++++++++------- .../vulkan/shader/convolution_pack4to8.comp | 17 ++++++++++------- src/layer/vulkan/shader/convolution_pack8.comp | 17 ++++++++++------- .../vulkan/shader/convolution_pack8to1.comp | 17 ++++++++++------- .../vulkan/shader/convolution_pack8to4.comp | 17 ++++++++++------- 12 files changed, 102 insertions(+), 64 deletions(-) diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml index f02b58985..b051034ba 100644 --- a/.github/workflows/test-coverage.yml +++ b/.github/workflows/test-coverage.yml @@ -252,7 +252,6 @@ jobs: cd build lcov -d ./src -c -o lcov.info lcov -r lcov.info '/usr/*' -o lcov.info - lcov -r lcov.info '*/install/*' -o lcov.info lcov -r lcov.info '*/build/*' -o lcov.info lcov --list lcov.info diff --git a/src/gpu.cpp b/src/gpu.cpp index 106458883..739e5a4d4 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -334,6 +334,7 @@ public: int support_VK_KHR_sampler_ycbcr_conversion; int support_VK_KHR_shader_float16_int8; int support_VK_KHR_shader_float_controls; + int support_VK_KHR_shader_non_semantic_info; int support_VK_KHR_shader_subgroup_extended_types; int support_VK_KHR_shader_subgroup_rotate; int support_VK_KHR_storage_buffer_storage_class; @@ -670,6 +671,7 @@ int GpuInfoPrivate::query_extensions() support_VK_KHR_sampler_ycbcr_conversion = 0; support_VK_KHR_shader_float16_int8 = 0; support_VK_KHR_shader_float_controls = 0; + support_VK_KHR_shader_non_semantic_info = 0; support_VK_KHR_shader_subgroup_extended_types = 0; support_VK_KHR_shader_subgroup_rotate = 0; support_VK_KHR_storage_buffer_storage_class = 0; @@ -733,6 +735,8 @@ int GpuInfoPrivate::query_extensions() support_VK_KHR_shader_float16_int8 = exp.specVersion; else if (strcmp(exp.extensionName, "VK_KHR_shader_float_controls") == 0) support_VK_KHR_shader_float_controls = exp.specVersion; + else if (strcmp(exp.extensionName, "VK_KHR_shader_non_semantic_info") == 0) + support_VK_KHR_shader_non_semantic_info = exp.specVersion; else if (strcmp(exp.extensionName, "VK_KHR_shader_subgroup_extended_types") == 0) support_VK_KHR_shader_subgroup_extended_types = exp.specVersion; else if (strcmp(exp.extensionName, "VK_KHR_shader_subgroup_rotate") == 0) @@ -1552,6 +1556,11 @@ int GpuInfo::support_VK_KHR_shader_float_controls() const return d->support_VK_KHR_shader_float_controls; } +int GpuInfo::support_VK_KHR_shader_non_semantic_info() const +{ + return d->support_VK_KHR_shader_non_semantic_info; +} + int GpuInfo::support_VK_KHR_shader_subgroup_extended_types() const { return d->support_VK_KHR_shader_subgroup_extended_types; @@ -2716,6 +2725,8 @@ VulkanDevice::VulkanDevice(int device_index) enabledExtensions.push_back("VK_KHR_shader_float16_int8"); if (info.support_VK_KHR_shader_float_controls()) enabledExtensions.push_back("VK_KHR_shader_float_controls"); + if (info.support_VK_KHR_shader_non_semantic_info()) + enabledExtensions.push_back("VK_KHR_shader_non_semantic_info"); if (info.support_VK_KHR_shader_subgroup_extended_types()) enabledExtensions.push_back("VK_KHR_shader_subgroup_extended_types"); if (info.support_VK_KHR_shader_subgroup_rotate()) diff --git a/src/gpu.h b/src/gpu.h index de6815002..24e696645 100644 --- a/src/gpu.h +++ b/src/gpu.h @@ -317,6 +317,7 @@ public: int support_VK_KHR_sampler_ycbcr_conversion() const; int support_VK_KHR_shader_float16_int8() const; int support_VK_KHR_shader_float_controls() const; + int support_VK_KHR_shader_non_semantic_info() const; int support_VK_KHR_shader_subgroup_extended_types() const; int support_VK_KHR_shader_subgroup_rotate() const; int support_VK_KHR_storage_buffer_storage_class() const; diff --git a/src/layer/vulkan/shader/convolution.comp b/src/layer/vulkan/shader/convolution.comp index ecf7b9d63..47745a66a 100644 --- a/src/layer/vulkan/shader/convolution.comp +++ b/src/layer/vulkan/shader/convolution.comp @@ -202,13 +202,16 @@ void main() #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); - image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); - image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); - image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); - image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); - image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); - image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); - image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); + if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); + if (gz2.y < psc(outc)) + { + image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); + if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); + if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + } #else const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; diff --git a/src/layer/vulkan/shader/convolution_pack1to4.comp b/src/layer/vulkan/shader/convolution_pack1to4.comp index a73a56536..3031f8f7f 100644 --- a/src/layer/vulkan/shader/convolution_pack1to4.comp +++ b/src/layer/vulkan/shader/convolution_pack1to4.comp @@ -202,13 +202,16 @@ void main() #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); - image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); - image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); - image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); - image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); - image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); - image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); - image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); + if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); + if (gz2.y < psc(outc)) + { + image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); + if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); + if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + } #else const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; diff --git a/src/layer/vulkan/shader/convolution_pack1to8.comp b/src/layer/vulkan/shader/convolution_pack1to8.comp index aad869f5f..a6821d263 100644 --- a/src/layer/vulkan/shader/convolution_pack1to8.comp +++ b/src/layer/vulkan/shader/convolution_pack1to8.comp @@ -220,13 +220,16 @@ void main() #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); - image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); - image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); - image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); - image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); - image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); - image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); - image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); + if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); + if (gz2.y < psc(outc)) + { + image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); + if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); + if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + } #else const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; diff --git a/src/layer/vulkan/shader/convolution_pack4.comp b/src/layer/vulkan/shader/convolution_pack4.comp index 5357afd44..afa2278b2 100644 --- a/src/layer/vulkan/shader/convolution_pack4.comp +++ b/src/layer/vulkan/shader/convolution_pack4.comp @@ -233,13 +233,16 @@ void main() #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); - image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); - image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); - image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); - image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); - image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); - image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); - image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); + if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); + if (gz2.y < psc(outc)) + { + image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); + if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); + if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + } #else const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; diff --git a/src/layer/vulkan/shader/convolution_pack4to1.comp b/src/layer/vulkan/shader/convolution_pack4to1.comp index 265affb14..b55d0b7e8 100644 --- a/src/layer/vulkan/shader/convolution_pack4to1.comp +++ b/src/layer/vulkan/shader/convolution_pack4to1.comp @@ -202,13 +202,16 @@ void main() #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); - image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); - image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); - image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); - image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); - image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); - image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); - image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); + if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); + if (gz2.y < psc(outc)) + { + image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); + if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); + if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + } #else const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; diff --git a/src/layer/vulkan/shader/convolution_pack4to8.comp b/src/layer/vulkan/shader/convolution_pack4to8.comp index 6657fa7d6..636e29765 100644 --- a/src/layer/vulkan/shader/convolution_pack4to8.comp +++ b/src/layer/vulkan/shader/convolution_pack4to8.comp @@ -348,13 +348,16 @@ void main() #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); - image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); - image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); - image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); - image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); - image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); - image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); - image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); + if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); + if (gz2.y < psc(outc)) + { + image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); + if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); + if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + } #else const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; diff --git a/src/layer/vulkan/shader/convolution_pack8.comp b/src/layer/vulkan/shader/convolution_pack8.comp index 0132d8507..300a46743 100644 --- a/src/layer/vulkan/shader/convolution_pack8.comp +++ b/src/layer/vulkan/shader/convolution_pack8.comp @@ -348,13 +348,16 @@ void main() #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); - image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); - image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); - image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); - image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); - image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); - image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); - image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); + if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); + if (gz2.y < psc(outc)) + { + image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); + if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); + if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + } #else const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; diff --git a/src/layer/vulkan/shader/convolution_pack8to1.comp b/src/layer/vulkan/shader/convolution_pack8to1.comp index 579e5997a..b24f608ae 100644 --- a/src/layer/vulkan/shader/convolution_pack8to1.comp +++ b/src/layer/vulkan/shader/convolution_pack8to1.comp @@ -204,13 +204,16 @@ void main() #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); - image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); - image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); - image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); - image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); - image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); - image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); - image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); + if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); + if (gz2.y < psc(outc)) + { + image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); + if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); + if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + } #else const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; diff --git a/src/layer/vulkan/shader/convolution_pack8to4.comp b/src/layer/vulkan/shader/convolution_pack8to4.comp index a6738682d..5fde4bb6e 100644 --- a/src/layer/vulkan/shader/convolution_pack8to4.comp +++ b/src/layer/vulkan/shader/convolution_pack8to4.comp @@ -264,13 +264,16 @@ void main() #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); - image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); - image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); - image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); - image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); - image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); - image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); - image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); + if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); + if (gz2.y < psc(outc)) + { + image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); + if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); + if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); + if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); + } #else const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;