| @@ -252,7 +252,6 @@ jobs: | |||
| cd build | |||
| lcov -d ./src -c -o lcov.info | |||
| lcov -r lcov.info '/usr/*' -o lcov.info | |||
| lcov -r lcov.info '*/install/*' -o lcov.info | |||
| lcov -r lcov.info '*/build/*' -o lcov.info | |||
| lcov --list lcov.info | |||
| @@ -334,6 +334,7 @@ public: | |||
| int support_VK_KHR_sampler_ycbcr_conversion; | |||
| int support_VK_KHR_shader_float16_int8; | |||
| int support_VK_KHR_shader_float_controls; | |||
| int support_VK_KHR_shader_non_semantic_info; | |||
| int support_VK_KHR_shader_subgroup_extended_types; | |||
| int support_VK_KHR_shader_subgroup_rotate; | |||
| int support_VK_KHR_storage_buffer_storage_class; | |||
| @@ -670,6 +671,7 @@ int GpuInfoPrivate::query_extensions() | |||
| support_VK_KHR_sampler_ycbcr_conversion = 0; | |||
| support_VK_KHR_shader_float16_int8 = 0; | |||
| support_VK_KHR_shader_float_controls = 0; | |||
| support_VK_KHR_shader_non_semantic_info = 0; | |||
| support_VK_KHR_shader_subgroup_extended_types = 0; | |||
| support_VK_KHR_shader_subgroup_rotate = 0; | |||
| support_VK_KHR_storage_buffer_storage_class = 0; | |||
| @@ -733,6 +735,8 @@ int GpuInfoPrivate::query_extensions() | |||
| support_VK_KHR_shader_float16_int8 = exp.specVersion; | |||
| else if (strcmp(exp.extensionName, "VK_KHR_shader_float_controls") == 0) | |||
| support_VK_KHR_shader_float_controls = exp.specVersion; | |||
| else if (strcmp(exp.extensionName, "VK_KHR_shader_non_semantic_info") == 0) | |||
| support_VK_KHR_shader_non_semantic_info = exp.specVersion; | |||
| else if (strcmp(exp.extensionName, "VK_KHR_shader_subgroup_extended_types") == 0) | |||
| support_VK_KHR_shader_subgroup_extended_types = exp.specVersion; | |||
| else if (strcmp(exp.extensionName, "VK_KHR_shader_subgroup_rotate") == 0) | |||
| @@ -1552,6 +1556,11 @@ int GpuInfo::support_VK_KHR_shader_float_controls() const | |||
| return d->support_VK_KHR_shader_float_controls; | |||
| } | |||
| int GpuInfo::support_VK_KHR_shader_non_semantic_info() const | |||
| { | |||
| return d->support_VK_KHR_shader_non_semantic_info; | |||
| } | |||
| int GpuInfo::support_VK_KHR_shader_subgroup_extended_types() const | |||
| { | |||
| return d->support_VK_KHR_shader_subgroup_extended_types; | |||
| @@ -2716,6 +2725,8 @@ VulkanDevice::VulkanDevice(int device_index) | |||
| enabledExtensions.push_back("VK_KHR_shader_float16_int8"); | |||
| if (info.support_VK_KHR_shader_float_controls()) | |||
| enabledExtensions.push_back("VK_KHR_shader_float_controls"); | |||
| if (info.support_VK_KHR_shader_non_semantic_info()) | |||
| enabledExtensions.push_back("VK_KHR_shader_non_semantic_info"); | |||
| if (info.support_VK_KHR_shader_subgroup_extended_types()) | |||
| enabledExtensions.push_back("VK_KHR_shader_subgroup_extended_types"); | |||
| if (info.support_VK_KHR_shader_subgroup_rotate()) | |||
| @@ -317,6 +317,7 @@ public: | |||
| int support_VK_KHR_sampler_ycbcr_conversion() const; | |||
| int support_VK_KHR_shader_float16_int8() const; | |||
| int support_VK_KHR_shader_float_controls() const; | |||
| int support_VK_KHR_shader_non_semantic_info() const; | |||
| int support_VK_KHR_shader_subgroup_extended_types() const; | |||
| int support_VK_KHR_shader_subgroup_rotate() const; | |||
| int support_VK_KHR_storage_buffer_storage_class() const; | |||
| @@ -202,13 +202,16 @@ void main() | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| if (gz2.y < psc(outc)) | |||
| { | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| } | |||
| #else | |||
| const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -202,13 +202,16 @@ void main() | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| if (gz2.y < psc(outc)) | |||
| { | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| } | |||
| #else | |||
| const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -220,13 +220,16 @@ void main() | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| if (gz2.y < psc(outc)) | |||
| { | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| } | |||
| #else | |||
| const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -233,13 +233,16 @@ void main() | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| if (gz2.y < psc(outc)) | |||
| { | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| } | |||
| #else | |||
| const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -202,13 +202,16 @@ void main() | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| if (gz2.y < psc(outc)) | |||
| { | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| } | |||
| #else | |||
| const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -348,13 +348,16 @@ void main() | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| if (gz2.y < psc(outc)) | |||
| { | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| } | |||
| #else | |||
| const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -348,13 +348,16 @@ void main() | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| if (gz2.y < psc(outc)) | |||
| { | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| } | |||
| #else | |||
| const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -204,13 +204,16 @@ void main() | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| if (gz2.y < psc(outc)) | |||
| { | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| } | |||
| #else | |||
| const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -264,13 +264,16 @@ void main() | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1); | |||
| if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3); | |||
| if (gz2.y < psc(outc)) | |||
| { | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4); | |||
| if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5); | |||
| if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6); | |||
| if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7); | |||
| } | |||
| #else | |||
| const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx; | |||