Browse Source

discover VK_KHR_shader_non_semantic_info, checked convolution imagestore (#5955)

tags/20250428
nihui GitHub 1 year ago
parent
commit
b284dbd0f4
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
12 changed files with 102 additions and 64 deletions
  1. +0
    -1
      .github/workflows/test-coverage.yml
  2. +11
    -0
      src/gpu.cpp
  3. +1
    -0
      src/gpu.h
  4. +10
    -7
      src/layer/vulkan/shader/convolution.comp
  5. +10
    -7
      src/layer/vulkan/shader/convolution_pack1to4.comp
  6. +10
    -7
      src/layer/vulkan/shader/convolution_pack1to8.comp
  7. +10
    -7
      src/layer/vulkan/shader/convolution_pack4.comp
  8. +10
    -7
      src/layer/vulkan/shader/convolution_pack4to1.comp
  9. +10
    -7
      src/layer/vulkan/shader/convolution_pack4to8.comp
  10. +10
    -7
      src/layer/vulkan/shader/convolution_pack8.comp
  11. +10
    -7
      src/layer/vulkan/shader/convolution_pack8to1.comp
  12. +10
    -7
      src/layer/vulkan/shader/convolution_pack8to4.comp

+ 0
- 1
.github/workflows/test-coverage.yml View File

@@ -252,7 +252,6 @@ jobs:
cd build
lcov -d ./src -c -o lcov.info
lcov -r lcov.info '/usr/*' -o lcov.info
lcov -r lcov.info '*/install/*' -o lcov.info
lcov -r lcov.info '*/build/*' -o lcov.info
lcov --list lcov.info



+ 11
- 0
src/gpu.cpp View File

@@ -334,6 +334,7 @@ public:
int support_VK_KHR_sampler_ycbcr_conversion;
int support_VK_KHR_shader_float16_int8;
int support_VK_KHR_shader_float_controls;
int support_VK_KHR_shader_non_semantic_info;
int support_VK_KHR_shader_subgroup_extended_types;
int support_VK_KHR_shader_subgroup_rotate;
int support_VK_KHR_storage_buffer_storage_class;
@@ -670,6 +671,7 @@ int GpuInfoPrivate::query_extensions()
support_VK_KHR_sampler_ycbcr_conversion = 0;
support_VK_KHR_shader_float16_int8 = 0;
support_VK_KHR_shader_float_controls = 0;
support_VK_KHR_shader_non_semantic_info = 0;
support_VK_KHR_shader_subgroup_extended_types = 0;
support_VK_KHR_shader_subgroup_rotate = 0;
support_VK_KHR_storage_buffer_storage_class = 0;
@@ -733,6 +735,8 @@ int GpuInfoPrivate::query_extensions()
support_VK_KHR_shader_float16_int8 = exp.specVersion;
else if (strcmp(exp.extensionName, "VK_KHR_shader_float_controls") == 0)
support_VK_KHR_shader_float_controls = exp.specVersion;
else if (strcmp(exp.extensionName, "VK_KHR_shader_non_semantic_info") == 0)
support_VK_KHR_shader_non_semantic_info = exp.specVersion;
else if (strcmp(exp.extensionName, "VK_KHR_shader_subgroup_extended_types") == 0)
support_VK_KHR_shader_subgroup_extended_types = exp.specVersion;
else if (strcmp(exp.extensionName, "VK_KHR_shader_subgroup_rotate") == 0)
@@ -1552,6 +1556,11 @@ int GpuInfo::support_VK_KHR_shader_float_controls() const
return d->support_VK_KHR_shader_float_controls;
}

int GpuInfo::support_VK_KHR_shader_non_semantic_info() const
{
return d->support_VK_KHR_shader_non_semantic_info;
}

int GpuInfo::support_VK_KHR_shader_subgroup_extended_types() const
{
return d->support_VK_KHR_shader_subgroup_extended_types;
@@ -2716,6 +2725,8 @@ VulkanDevice::VulkanDevice(int device_index)
enabledExtensions.push_back("VK_KHR_shader_float16_int8");
if (info.support_VK_KHR_shader_float_controls())
enabledExtensions.push_back("VK_KHR_shader_float_controls");
if (info.support_VK_KHR_shader_non_semantic_info())
enabledExtensions.push_back("VK_KHR_shader_non_semantic_info");
if (info.support_VK_KHR_shader_subgroup_extended_types())
enabledExtensions.push_back("VK_KHR_shader_subgroup_extended_types");
if (info.support_VK_KHR_shader_subgroup_rotate())


+ 1
- 0
src/gpu.h View File

@@ -317,6 +317,7 @@ public:
int support_VK_KHR_sampler_ycbcr_conversion() const;
int support_VK_KHR_shader_float16_int8() const;
int support_VK_KHR_shader_float_controls() const;
int support_VK_KHR_shader_non_semantic_info() const;
int support_VK_KHR_shader_subgroup_extended_types() const;
int support_VK_KHR_shader_subgroup_rotate() const;
int support_VK_KHR_storage_buffer_storage_class() const;


+ 10
- 7
src/layer/vulkan/shader/convolution.comp View File

@@ -202,13 +202,16 @@ void main()

#if NCNN_image_shader
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
if (gz2.y < psc(outc))
{
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
}
#else
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;



+ 10
- 7
src/layer/vulkan/shader/convolution_pack1to4.comp View File

@@ -202,13 +202,16 @@ void main()

#if NCNN_image_shader
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
if (gz2.y < psc(outc))
{
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
}
#else
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;



+ 10
- 7
src/layer/vulkan/shader/convolution_pack1to8.comp View File

@@ -220,13 +220,16 @@ void main()

#if NCNN_image_shader
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
if (gz2.y < psc(outc))
{
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
}
#else
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;



+ 10
- 7
src/layer/vulkan/shader/convolution_pack4.comp View File

@@ -233,13 +233,16 @@ void main()

#if NCNN_image_shader
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
if (gz2.y < psc(outc))
{
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
}
#else
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;



+ 10
- 7
src/layer/vulkan/shader/convolution_pack4to1.comp View File

@@ -202,13 +202,16 @@ void main()

#if NCNN_image_shader
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
if (gz2.y < psc(outc))
{
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
}
#else
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;



+ 10
- 7
src/layer/vulkan/shader/convolution_pack4to8.comp View File

@@ -348,13 +348,16 @@ void main()

#if NCNN_image_shader
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
if (gz2.y < psc(outc))
{
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
}
#else
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;



+ 10
- 7
src/layer/vulkan/shader/convolution_pack8.comp View File

@@ -348,13 +348,16 @@ void main()

#if NCNN_image_shader
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
if (gz2.y < psc(outc))
{
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
}
#else
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;



+ 10
- 7
src/layer/vulkan/shader/convolution_pack8to1.comp View File

@@ -204,13 +204,16 @@ void main()

#if NCNN_image_shader
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
if (gz2.y < psc(outc))
{
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
}
#else
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;



+ 10
- 7
src/layer/vulkan/shader/convolution_pack8to4.comp View File

@@ -264,13 +264,16 @@ void main()

#if NCNN_image_shader
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
if (gz2.y < psc(outc))
{
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
}
#else
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;



Loading…
Cancel
Save