From 6682cd16383a9f42929bc4e80e9f877017f5a67a Mon Sep 17 00:00:00 2001 From: nihuini Date: Tue, 12 May 2020 16:20:23 +0800 Subject: [PATCH] image fp16pa, mark some bugihfa todo --- cmake/ncnn_generate_shader_spv_header.cmake | 91 +++++++++++++++++-- src/CMakeLists.txt | 7 +- src/gpu.cpp | 27 ++++-- src/layer/vulkan/shader/crop_pack4to8.comp | 1 + src/layer/vulkan/shader/crop_pack8to4.comp | 1 + src/layer/vulkan/shader/packing_pack1to8.comp | 2 - .../shader/packing_pack1to8_fp16_to_fp32.comp | 2 - .../shader/packing_pack1to8_fp32_to_fp16.comp | 2 - src/layer/vulkan/shader/packing_pack8.comp | 2 - .../shader/packing_pack8_fp16_to_fp32.comp | 2 - .../shader/packing_pack8_fp32_to_fp16.comp | 2 - src/layer/vulkan/shader/packing_pack8to1.comp | 2 - .../shader/packing_pack8to1_fp16_to_fp32.comp | 2 - .../shader/packing_pack8to1_fp32_to_fp16.comp | 2 - src/layer/vulkan/shader/packing_pack8to4.comp | 2 - .../shader/packing_pack8to4_fp16_to_fp32.comp | 2 - .../shader/packing_pack8to4_fp32_to_fp16.comp | 2 - .../vulkan/shader/shufflechannel_pack4.comp | 1 + .../vulkan/shader/shufflechannel_pack8.comp | 1 + src/pipeline.cpp | 34 ++++--- 20 files changed, 130 insertions(+), 57 deletions(-) diff --git a/cmake/ncnn_generate_shader_spv_header.cmake b/cmake/ncnn_generate_shader_spv_header.cmake index 6e43eedfc..5a98d0637 100644 --- a/cmake/ncnn_generate_shader_spv_header.cmake +++ b/cmake/ncnn_generate_shader_spv_header.cmake @@ -328,6 +328,77 @@ function(ncnn_generate_shader_spv_header SHADER_SPV_HEADER SHADER_SPV_HEX_HEADER ) set_source_files_properties(${SHADER_image_fp16p_SPV_HEX_FILE} PROPERTIES GENERATED TRUE) + # image + fp16p + fp16a + set(SHADER_image_fp16pa_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image_fp16pa") + + set(SHADER_image_fp16pa_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_image_fp16pa_SRC_NAME_WE}.spv.hex.h) + add_custom_command( + OUTPUT ${SHADER_image_fp16pa_SPV_HEX_FILE} + COMMAND ${GLSLANGVALIDATOR_EXECUTABLE} + ARGS -Dsfp=float -Dsfpvec2=uint -Dsfpvec4=uvec2 -Dsfpvec8=uvec4 + -Dafp=float16_t -Dafpvec2=f16vec2 -Dafpvec4=f16vec4 -Dafpvec8=f16mat2x4 -Dafpmat4=f16mat4 + + -Dimfmtc1=r32f -Dimfmtc4=rgba16f + -Dunfp=mediump + + "-D image1d_ld1(tex,p)=float16_t(texelFetch(tex,p,0).r)" + "-D image2d_ld1(tex,p)=float16_t(texelFetch(tex,p,0).r)" + "-D image3d_ld1(tex,p)=float16_t(texelFetch(tex,p,0).r)" + "-D image1d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}" + "-D image2d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}" + "-D image3d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}" + "-D image1d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}" + "-D image2d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}" + "-D image3d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}" + + "-D image1d_ld4(tex,p)=f16vec4(texelFetch(tex,p,0))" + "-D image2d_ld4(tex,p)=f16vec4(texelFetch(tex,p,0))" + "-D image3d_ld4(tex,p)=f16vec4(texelFetch(tex,p,0))" + "-D image1d_st4(img,p,v)={imageStore(img,p,v);}" + "-D image2d_st4(img,p,v)={imageStore(img,p,v);}" + "-D image3d_st4(img,p,v)={imageStore(img,p,v);}" + "-D image1d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}" + "-D image2d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}" + "-D image3d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}" + + "-D image1d_ld8(tex,p)=f16mat2x4(texelFetch(tex,p*2,0),texelFetch(tex,p*2+1,0))" + "-D image2d_ld8(tex,p)=f16mat2x4(texelFetch(tex,ivec2(p.x*2,p.y),0),texelFetch(tex,ivec2(p.x*2+1,p.y),0))" + "-D image3d_ld8(tex,p)=f16mat2x4(texelFetch(tex,ivec3(p.x*2,p.y,p.z),0),texelFetch(tex,ivec3(p.x*2+1,p.y,p.z),0))" + "-D image1d_st8(img,p,v)={imageStore(img,p*2,v[0]);imageStore(img,p*2+1,v[1]);}" + "-D image2d_st8(img,p,v)={imageStore(img,ivec2(p.x*2,p.y),v[0]);imageStore(img,ivec2(p.x*2+1,p.y),v[1]);}" + "-D image3d_st8(img,p,v)={imageStore(img,ivec3(p.x*2,p.y,p.z),v[0]);imageStore(img,ivec3(p.x*2+1,p.y,p.z),v[1]);}" + "-D image1d_cp8(img,p,tex,sp)={imageStore(img,p*2,texelFetch(tex,sp*2,0));imageStore(img,p*2+1,texelFetch(tex,sp*2+1,0));}" + "-D image2d_cp8(img,p,tex,sp)={imageStore(img,ivec2(p.x*2,p.y),texelFetch(tex,ivec2(sp.x*2,sp.y),0));imageStore(img,ivec2(p.x*2+1,p.y),texelFetch(tex,ivec2(sp.x*2+1,sp.y),0));}" + "-D image3d_cp8(img,p,tex,sp)={imageStore(img,ivec3(p.x*2,p.y,p.z),texelFetch(tex,ivec3(sp.x*2,sp.y,sp.z),0));imageStore(img,ivec3(p.x*2+1,p.y,p.z),texelFetch(tex,ivec3(sp.x*2+1,sp.y,sp.z),0));}" + + "-D buffer_ld1(buf,i)=float16_t(buf[i])" + "-D buffer_st1(buf,i,v)={buf[i]=float(v);}" + "-D buffer_cp1(buf,i,sbuf,si)={buf[i]=sbuf[si];}" + "-D buffer_cp1to4(buf,i,sbuf,si4)={buf[i]=uvec2(packHalf2x16(vec2(f16vec2(sbuf[si4.r],sbuf[si4.g]))),packHalf2x16(vec2(f16vec2(sbuf[si4.b],sbuf[si4.a]))));}" + "-D buffer_cp1to8(buf,i,sbuf,si4,sii4)={buf[i]=uvec4(packHalf2x16(vec2(f16vec2(sbuf[si4.r],sbuf[si4.g]))),packHalf2x16(vec2(f16vec2(sbuf[si4.b],sbuf[si4.a]))),packHalf2x16(vec2(f16vec2(sbuf[sii4.r],sbuf[sii4.g]))),packHalf2x16(vec2(f16vec2(sbuf[sii4.b],sbuf[sii4.a]))));}" + "-D buffer_ld2(buf,i)=f16vec2(unpackHalf2x16(buf[i]))" + "-D buffer_st2(buf,i,v)={buf[i]=packHalf2x16(vec2(v))}" + "-D buffer_cp2(buf,i,sbuf,si)={buf[i]=sbuf[si];}" + "-D buffer_ld4(buf,i)=f16vec4(vec4(unpackHalf2x16(buf[i].x),unpackHalf2x16(buf[i].y)))" + "-D buffer_st4(buf,i,v)={buf[i]=uvec2(packHalf2x16(vec2(v.rg)),packHalf2x16(vec2(v.ba)));}" + "-D buffer_cp4(buf,i,sbuf,si)={buf[i]=sbuf[si];}" + "-D buffer_cp4to1(buf,i4,sbuf,si)={uvec2 _v=sbuf[si]; vec2 _v0=unpackHalf2x16(_v.x);vec2 _v1=unpackHalf2x16(_v.y); buf[i4.r]=_v0.r;buf[i4.g]=_v0.g;buf[i4.b]=_v1.r;buf[i4.a]=_v1.g;}" + "-D buffer_cp4to8(buf,i,sbuf,si2)={buf[i]=uvec4(sbuf[si2.r],sbuf[si2.g]);}" + "-D buffer_ld8(buf,i)=f16mat2x4(f16vec4(vec4(unpackHalf2x16(buf[i].r),unpackHalf2x16(buf[i].g))),f16vec4(vec4(unpackHalf2x16(buf[i].b),unpackHalf2x16(buf[i].a))))" + "-D buffer_st8(buf,i,v)={buf[i]=uvec4(uvec2(packHalf2x16(vec2(v[0].rg)),packHalf2x16(vec2(v[0].ba))),uvec2(packHalf2x16(vec2(v[1].rg)),packHalf2x16(vec2(v[1].ba))));}" + "-D buffer_cp8(buf,i,sbuf,si)={buf[i]=sbuf[si];}" + "-D buffer_cp8to1(buf,i4,ii4,sbuf,si)={uvec4 _v=sbuf[si]; vec2 _v0=unpackHalf2x16(_v.r);vec2 _v1=unpackHalf2x16(_v.g);vec2 _v2=unpackHalf2x16(_v.b);vec2 _v3=unpackHalf2x16(_v.a); buf[i4.r]=_v0.r;buf[i4.g]=_v0.g;buf[i4.b]=_v1.r;buf[i4.a]=_v1.g; buf[ii4.r]=_v2.r;buf[ii4.g]=_v2.g;buf[ii4.b]=_v3.r;buf[ii4.a]=_v3.g;}" + "-D buffer_cp8to4(buf,i2,sbuf,si)={uvec4 _v=sbuf[si]; buf[i2.r]=_v.rg;buf[i2.g]=_v.ba;}" + + "-D psc(x)=(x==0?p.x:x)" + -DNCNN_image_shader=1 -DNCNN_fp16_packed=1 -DNCNN_fp16_arithmetic=1 + -V -s -x -o ${SHADER_image_fp16pa_SPV_HEX_FILE} ${SHADER_SRC} + DEPENDS ${SHADER_SRC} + COMMENT "Building SPIR-V module ${SHADER_image_fp16pa_SRC_NAME_WE}.spv" + VERBATIM + ) + set_source_files_properties(${SHADER_image_fp16pa_SPV_HEX_FILE} PROPERTIES GENERATED TRUE) + # image + fp16s set(SHADER_image_fp16s_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image_fp16s") @@ -401,12 +472,12 @@ function(ncnn_generate_shader_spv_header SHADER_SPV_HEADER SHADER_SPV_HEX_HEADER ) set_source_files_properties(${SHADER_image_fp16s_SPV_HEX_FILE} PROPERTIES GENERATED TRUE) - # image + fp16a - set(SHADER_image_fp16a_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image_fp16a") + # image + fp16s + fp16a + set(SHADER_image_fp16sa_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image_fp16sa") - set(SHADER_image_fp16a_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_image_fp16a_SRC_NAME_WE}.spv.hex.h) + set(SHADER_image_fp16sa_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_image_fp16sa_SRC_NAME_WE}.spv.hex.h) add_custom_command( - OUTPUT ${SHADER_image_fp16a_SPV_HEX_FILE} + OUTPUT ${SHADER_image_fp16sa_SPV_HEX_FILE} COMMAND ${GLSLANGVALIDATOR_EXECUTABLE} ARGS -Dsfp=float16_t -Dsfpvec2=f16vec2 -Dsfpvec4=f16vec4 -Dsfpvec8=f16mat2x4 -Dsfpmat4=f16mat4 -Dafp=float16_t -Dafpvec2=f16vec2 -Dafpvec4=f16vec4 -Dafpvec8=f16mat2x4 -Dafpmat4=f16mat4 @@ -467,12 +538,12 @@ function(ncnn_generate_shader_spv_header SHADER_SPV_HEADER SHADER_SPV_HEX_HEADER "-D psc(x)=(x==0?p.x:x)" -DNCNN_image_shader=1 -DNCNN_fp16_storage=1 -DNCNN_fp16_arithmetic=1 - -V -s -x -o ${SHADER_image_fp16a_SPV_HEX_FILE} ${SHADER_SRC} + -V -s -x -o ${SHADER_image_fp16sa_SPV_HEX_FILE} ${SHADER_SRC} DEPENDS ${SHADER_SRC} - COMMENT "Building SPIR-V module ${SHADER_image_fp16a_SRC_NAME_WE}.spv" + COMMENT "Building SPIR-V module ${SHADER_image_fp16sa_SRC_NAME_WE}.spv" VERBATIM ) - set_source_files_properties(${SHADER_image_fp16a_SPV_HEX_FILE} PROPERTIES GENERATED TRUE) + set_source_files_properties(${SHADER_image_fp16sa_SPV_HEX_FILE} PROPERTIES GENERATED TRUE) set(LOCAL_SHADER_SPV_HEADER ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_SRC_NAME_WE}.spv.h) @@ -484,8 +555,9 @@ function(ncnn_generate_shader_spv_header SHADER_SPV_HEADER SHADER_SPV_HEX_HEADER "static const uint32_t ${SHADER_fp16sa_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_fp16sa_SRC_NAME_WE}.spv.hex.h\"\n};\n" "static const uint32_t ${SHADER_image_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_SRC_NAME_WE}.spv.hex.h\"\n};\n" "static const uint32_t ${SHADER_image_fp16p_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16p_SRC_NAME_WE}.spv.hex.h\"\n};\n" + "static const uint32_t ${SHADER_image_fp16pa_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16pa_SRC_NAME_WE}.spv.hex.h\"\n};\n" "static const uint32_t ${SHADER_image_fp16s_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16s_SRC_NAME_WE}.spv.hex.h\"\n};\n" - "static const uint32_t ${SHADER_image_fp16a_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16a_SRC_NAME_WE}.spv.hex.h\"\n};\n" + "static const uint32_t ${SHADER_image_fp16sa_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16sa_SRC_NAME_WE}.spv.hex.h\"\n};\n" ) set_source_files_properties(${LOCAL_SHADER_SPV_HEADER} PROPERTIES GENERATED TRUE) @@ -498,8 +570,9 @@ function(ncnn_generate_shader_spv_header SHADER_SPV_HEADER SHADER_SPV_HEX_HEADER ${SHADER_fp16sa_SPV_HEX_FILE} ${SHADER_image_SPV_HEX_FILE} ${SHADER_image_fp16p_SPV_HEX_FILE} + ${SHADER_image_fp16pa_SPV_HEX_FILE} ${SHADER_image_fp16s_SPV_HEX_FILE} - ${SHADER_image_fp16a_SPV_HEX_FILE} + ${SHADER_image_fp16sa_SPV_HEX_FILE} ) set(${SHADER_SPV_HEADER} ${LOCAL_SHADER_SPV_HEADER} PARENT_SCOPE) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e33223761..4615c11ae 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -62,8 +62,9 @@ macro(ncnn_add_shader SHADER_SRC) string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_fp16sa_spv_data,sizeof(${SHADER_SRC_NAME_WE}_fp16sa_spv_data)},\n") string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_spv_data)},\n") string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16p_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16p_spv_data)},\n") + string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16pa_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16pa_spv_data)},\n") string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16s_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16s_spv_data)},\n") - string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16a_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16a_spv_data)},\n") + string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16sa_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16sa_spv_data)},\n") list(APPEND SHADER_SPV_HEX_FILES ${SHADER_SPV_HEADER}) list(APPEND SHADER_SPV_HEX_FILES ${SHADER_SPV_HEX_HEADERS}) @@ -83,9 +84,11 @@ macro(ncnn_add_shader SHADER_SRC) math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1") set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16p = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n") math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1") + set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16pa = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n") + math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1") set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16s = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n") math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1") - set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16a = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n") + set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16sa = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n") math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1") endmacro() diff --git a/src/gpu.cpp b/src/gpu.cpp index 4ad609b89..f5f766694 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -1613,54 +1613,61 @@ int VulkanDevice::create_shader_module() // 4 = fp16sa // 5 = image // 6 = image_fp16p - // 7 = image_fp16s - // 8 = image_fp16a + // 7 = image_fp16pa + // 8 = image_fp16s + // 9 = image_fp16sa if (!info.support_fp16_packed) { - if (i % 9 == 1) + if (i % 10 == 1) continue; } if (!info.support_fp16_packed || !info.support_fp16_arithmetic) { - if (i % 9 == 2) + if (i % 10 == 2) continue; } if (!info.support_fp16_storage) { - if (i % 9 == 3) + if (i % 10 == 3) continue; } if (!info.support_fp16_storage || !info.support_fp16_arithmetic) { - if (i % 9 == 4) + if (i % 10 == 4) continue; } // if (!info.support_image_storage) // { -// if (i % 9 == 5) +// if (i % 10 == 5) // continue; // } if (!info.support_fp16_packed) { - if (i % 9 == 6) + if (i % 10 == 6) + continue; + } + + if (!info.support_fp16_packed || !info.support_fp16_arithmetic) + { + if (i % 10 == 7) continue; } if (!info.support_fp16_storage) { - if (i % 9 == 7) + if (i % 10 == 8) continue; } if (!info.support_fp16_storage || !info.support_fp16_arithmetic) { - if (i % 9 == 8) + if (i % 10 == 9) continue; } diff --git a/src/layer/vulkan/shader/crop_pack4to8.comp b/src/layer/vulkan/shader/crop_pack4to8.comp index 5635208a1..e7e134929 100644 --- a/src/layer/vulkan/shader/crop_pack4to8.comp +++ b/src/layer/vulkan/shader/crop_pack4to8.comp @@ -97,6 +97,7 @@ void main() afpvec4 v7 = image3d_ld4(bottom_blob, ivec3(x, y, zz4.a / 4)); afpvec8 v; + // TODO bugihfa v[0].r = v0[z4.r % 4]; v[0].g = v1[z4.g % 4]; v[0].b = v2[z4.b % 4]; diff --git a/src/layer/vulkan/shader/crop_pack8to4.comp b/src/layer/vulkan/shader/crop_pack8to4.comp index c3b82b5ea..bc40310eb 100644 --- a/src/layer/vulkan/shader/crop_pack8to4.comp +++ b/src/layer/vulkan/shader/crop_pack8to4.comp @@ -92,6 +92,7 @@ void main() afpvec8 v3 = image3d_ld8(bottom_blob, ivec3(x, y, z4.a / 8)); afpvec4 v; + // TODO bugihfa v.r = v0[(z4.r % 8) / 4][z4.r % 4]; v.g = v1[(z4.g % 8) / 4][z4.g % 4]; v.b = v2[(z4.b % 8) / 4][z4.b % 4]; diff --git a/src/layer/vulkan/shader/packing_pack1to8.comp b/src/layer/vulkan/shader/packing_pack1to8.comp index c9e0cc1e7..a97fbe923 100644 --- a/src/layer/vulkan/shader/packing_pack1to8.comp +++ b/src/layer/vulkan/shader/packing_pack1to8.comp @@ -16,10 +16,8 @@ #if NCNN_fp16_storage #extension GL_EXT_shader_16bit_storage: require -#if !NCNN_fp16_arithmetic struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #endif -#endif #if NCNN_fp16_arithmetic #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif diff --git a/src/layer/vulkan/shader/packing_pack1to8_fp16_to_fp32.comp b/src/layer/vulkan/shader/packing_pack1to8_fp16_to_fp32.comp index 72c0e998c..62a980788 100644 --- a/src/layer/vulkan/shader/packing_pack1to8_fp16_to_fp32.comp +++ b/src/layer/vulkan/shader/packing_pack1to8_fp16_to_fp32.comp @@ -16,10 +16,8 @@ #if NCNN_fp16_storage #extension GL_EXT_shader_16bit_storage: require -#if !NCNN_fp16_arithmetic struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #endif -#endif #if NCNN_fp16_arithmetic #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif diff --git a/src/layer/vulkan/shader/packing_pack1to8_fp32_to_fp16.comp b/src/layer/vulkan/shader/packing_pack1to8_fp32_to_fp16.comp index 2ba143579..6b3a405e7 100644 --- a/src/layer/vulkan/shader/packing_pack1to8_fp32_to_fp16.comp +++ b/src/layer/vulkan/shader/packing_pack1to8_fp32_to_fp16.comp @@ -16,10 +16,8 @@ #if NCNN_fp16_storage #extension GL_EXT_shader_16bit_storage: require -#if !NCNN_fp16_arithmetic struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #endif -#endif #if NCNN_fp16_arithmetic #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif diff --git a/src/layer/vulkan/shader/packing_pack8.comp b/src/layer/vulkan/shader/packing_pack8.comp index c07597764..5b53e5b55 100644 --- a/src/layer/vulkan/shader/packing_pack8.comp +++ b/src/layer/vulkan/shader/packing_pack8.comp @@ -16,10 +16,8 @@ #if NCNN_fp16_storage #extension GL_EXT_shader_16bit_storage: require -#if !NCNN_fp16_arithmetic struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #endif -#endif #if NCNN_fp16_arithmetic #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif diff --git a/src/layer/vulkan/shader/packing_pack8_fp16_to_fp32.comp b/src/layer/vulkan/shader/packing_pack8_fp16_to_fp32.comp index 88d5c8e9d..9576e59a6 100644 --- a/src/layer/vulkan/shader/packing_pack8_fp16_to_fp32.comp +++ b/src/layer/vulkan/shader/packing_pack8_fp16_to_fp32.comp @@ -16,10 +16,8 @@ #if NCNN_fp16_storage #extension GL_EXT_shader_16bit_storage: require -#if !NCNN_fp16_arithmetic struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #endif -#endif #if NCNN_fp16_arithmetic #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif diff --git a/src/layer/vulkan/shader/packing_pack8_fp32_to_fp16.comp b/src/layer/vulkan/shader/packing_pack8_fp32_to_fp16.comp index 29f140016..b78422346 100644 --- a/src/layer/vulkan/shader/packing_pack8_fp32_to_fp16.comp +++ b/src/layer/vulkan/shader/packing_pack8_fp32_to_fp16.comp @@ -16,10 +16,8 @@ #if NCNN_fp16_storage #extension GL_EXT_shader_16bit_storage: require -#if !NCNN_fp16_arithmetic struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #endif -#endif #if NCNN_fp16_arithmetic #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif diff --git a/src/layer/vulkan/shader/packing_pack8to1.comp b/src/layer/vulkan/shader/packing_pack8to1.comp index 22e5a996e..6eed4ce56 100644 --- a/src/layer/vulkan/shader/packing_pack8to1.comp +++ b/src/layer/vulkan/shader/packing_pack8to1.comp @@ -16,10 +16,8 @@ #if NCNN_fp16_storage #extension GL_EXT_shader_16bit_storage: require -#if !NCNN_fp16_arithmetic struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #endif -#endif #if NCNN_fp16_arithmetic #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif diff --git a/src/layer/vulkan/shader/packing_pack8to1_fp16_to_fp32.comp b/src/layer/vulkan/shader/packing_pack8to1_fp16_to_fp32.comp index 81eb42bbe..f670c5443 100644 --- a/src/layer/vulkan/shader/packing_pack8to1_fp16_to_fp32.comp +++ b/src/layer/vulkan/shader/packing_pack8to1_fp16_to_fp32.comp @@ -16,10 +16,8 @@ #if NCNN_fp16_storage #extension GL_EXT_shader_16bit_storage: require -#if !NCNN_fp16_arithmetic struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #endif -#endif #if NCNN_fp16_arithmetic #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif diff --git a/src/layer/vulkan/shader/packing_pack8to1_fp32_to_fp16.comp b/src/layer/vulkan/shader/packing_pack8to1_fp32_to_fp16.comp index ef8adb326..8c162f0f3 100644 --- a/src/layer/vulkan/shader/packing_pack8to1_fp32_to_fp16.comp +++ b/src/layer/vulkan/shader/packing_pack8to1_fp32_to_fp16.comp @@ -16,10 +16,8 @@ #if NCNN_fp16_storage #extension GL_EXT_shader_16bit_storage: require -#if !NCNN_fp16_arithmetic struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #endif -#endif #if NCNN_fp16_arithmetic #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif diff --git a/src/layer/vulkan/shader/packing_pack8to4.comp b/src/layer/vulkan/shader/packing_pack8to4.comp index 4ca709005..4a61fb77e 100644 --- a/src/layer/vulkan/shader/packing_pack8to4.comp +++ b/src/layer/vulkan/shader/packing_pack8to4.comp @@ -16,10 +16,8 @@ #if NCNN_fp16_storage #extension GL_EXT_shader_16bit_storage: require -#if !NCNN_fp16_arithmetic struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #endif -#endif #if NCNN_fp16_arithmetic #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif diff --git a/src/layer/vulkan/shader/packing_pack8to4_fp16_to_fp32.comp b/src/layer/vulkan/shader/packing_pack8to4_fp16_to_fp32.comp index c78e3dbf1..564356caa 100644 --- a/src/layer/vulkan/shader/packing_pack8to4_fp16_to_fp32.comp +++ b/src/layer/vulkan/shader/packing_pack8to4_fp16_to_fp32.comp @@ -16,10 +16,8 @@ #if NCNN_fp16_storage #extension GL_EXT_shader_16bit_storage: require -#if !NCNN_fp16_arithmetic struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #endif -#endif #if NCNN_fp16_arithmetic #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif diff --git a/src/layer/vulkan/shader/packing_pack8to4_fp32_to_fp16.comp b/src/layer/vulkan/shader/packing_pack8to4_fp32_to_fp16.comp index 1fa5f072a..762977406 100644 --- a/src/layer/vulkan/shader/packing_pack8to4_fp32_to_fp16.comp +++ b/src/layer/vulkan/shader/packing_pack8to4_fp32_to_fp16.comp @@ -16,10 +16,8 @@ #if NCNN_fp16_storage #extension GL_EXT_shader_16bit_storage: require -#if !NCNN_fp16_arithmetic struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #endif -#endif #if NCNN_fp16_arithmetic #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif diff --git a/src/layer/vulkan/shader/shufflechannel_pack4.comp b/src/layer/vulkan/shader/shufflechannel_pack4.comp index 0aed8308a..f931b7b64 100644 --- a/src/layer/vulkan/shader/shufflechannel_pack4.comp +++ b/src/layer/vulkan/shader/shufflechannel_pack4.comp @@ -98,6 +98,7 @@ void main() ivec4 lane4 = z4 % 4; + // TODO bugihfa afpvec4 v = afpvec4(vr[lane4.r], vg[lane4.g], vb[lane4.b], va[lane4.a]); #if NCNN_image_shader diff --git a/src/layer/vulkan/shader/shufflechannel_pack8.comp b/src/layer/vulkan/shader/shufflechannel_pack8.comp index ea03ea121..fab4c482c 100644 --- a/src/layer/vulkan/shader/shufflechannel_pack8.comp +++ b/src/layer/vulkan/shader/shufflechannel_pack8.comp @@ -117,6 +117,7 @@ void main() ivec4 lane4 = z4 % 4; ivec4 lane8 = zz4 % 4; + // TODO bugihfa afpvec8 v = afpvec8(vr[sz4.r][lane4.r], vg[sz4.g][lane4.g], vb[sz4.b][lane4.b], va[sz4.a][lane4.a], vvr[szz4.r][lane8.r], vvg[szz4.g][lane8.g], vvb[szz4.b][lane8.b], vva[szz4.a][lane8.a]); #if NCNN_image_shader diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 77278b53a..6c8696aa8 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -89,18 +89,23 @@ int Pipeline::create(int shader_type_index, const Option& opt, const std::vector // 4 = fp16sa // 5 = image // 6 = image_fp16p - // 7 = image_fp16s - // 8 = image_fp16a + // 7 = image_fp16pa + // 8 = image_fp16s + // 9 = image_fp16sa - if (opt.use_image_storage && opt.use_fp16_storage && opt.use_fp16_arithmetic) + if (opt.use_image_storage && vkdev->info.support_fp16_storage && opt.use_fp16_storage && vkdev->info.support_fp16_arithmetic && opt.use_fp16_arithmetic) { - shader_type_index += 8; + shader_type_index += 9; } - else if (opt.use_image_storage && opt.use_fp16_storage) + else if (opt.use_image_storage && vkdev->info.support_fp16_packed && opt.use_fp16_packed && vkdev->info.support_fp16_arithmetic && opt.use_fp16_arithmetic) { shader_type_index += 7; } - else if (opt.use_image_storage && opt.use_fp16_packed) + else if (opt.use_image_storage && vkdev->info.support_fp16_storage && opt.use_fp16_storage) + { + shader_type_index += 8; + } + else if (opt.use_image_storage && vkdev->info.support_fp16_packed && opt.use_fp16_packed) { shader_type_index += 6; } @@ -595,18 +600,23 @@ int ImportAndroidHardwareBufferPipeline::create(VkAndroidHardwareBufferImageAllo // 4 = fp16sa // 5 = image // 6 = image_fp16p - // 7 = image_fp16s - // 8 = image_fp16a + // 7 = image_fp16pa + // 8 = image_fp16s + // 9 = image_fp16sa - if (opt.use_image_storage && opt.use_fp16_storage && opt.use_fp16_arithmetic) + if (opt.use_image_storage && vkdev->info.support_fp16_storage && opt.use_fp16_storage && vkdev->info.support_fp16_arithmetic && opt.use_fp16_arithmetic) { - shader_type_index += 8; + shader_type_index += 9; } - else if (opt.use_image_storage && opt.use_fp16_storage) + else if (opt.use_image_storage && vkdev->info.support_fp16_packed && opt.use_fp16_packed && vkdev->info.support_fp16_arithmetic && opt.use_fp16_arithmetic) { shader_type_index += 7; } - else if (opt.use_image_storage && opt.use_fp16_packed) + else if (opt.use_image_storage && vkdev->info.support_fp16_storage && opt.use_fp16_storage) + { + shader_type_index += 8; + } + else if (opt.use_image_storage && vkdev->info.support_fp16_packed && opt.use_fp16_packed) { shader_type_index += 6; }