| @@ -328,6 +328,77 @@ function(ncnn_generate_shader_spv_header SHADER_SPV_HEADER SHADER_SPV_HEX_HEADER | |||
| ) | |||
| set_source_files_properties(${SHADER_image_fp16p_SPV_HEX_FILE} PROPERTIES GENERATED TRUE) | |||
| # image + fp16p + fp16a | |||
| set(SHADER_image_fp16pa_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image_fp16pa") | |||
| set(SHADER_image_fp16pa_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_image_fp16pa_SRC_NAME_WE}.spv.hex.h) | |||
| add_custom_command( | |||
| OUTPUT ${SHADER_image_fp16pa_SPV_HEX_FILE} | |||
| COMMAND ${GLSLANGVALIDATOR_EXECUTABLE} | |||
| ARGS -Dsfp=float -Dsfpvec2=uint -Dsfpvec4=uvec2 -Dsfpvec8=uvec4 | |||
| -Dafp=float16_t -Dafpvec2=f16vec2 -Dafpvec4=f16vec4 -Dafpvec8=f16mat2x4 -Dafpmat4=f16mat4 | |||
| -Dimfmtc1=r32f -Dimfmtc4=rgba16f | |||
| -Dunfp=mediump | |||
| "-D image1d_ld1(tex,p)=float16_t(texelFetch(tex,p,0).r)" | |||
| "-D image2d_ld1(tex,p)=float16_t(texelFetch(tex,p,0).r)" | |||
| "-D image3d_ld1(tex,p)=float16_t(texelFetch(tex,p,0).r)" | |||
| "-D image1d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}" | |||
| "-D image2d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}" | |||
| "-D image3d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}" | |||
| "-D image1d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}" | |||
| "-D image2d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}" | |||
| "-D image3d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}" | |||
| "-D image1d_ld4(tex,p)=f16vec4(texelFetch(tex,p,0))" | |||
| "-D image2d_ld4(tex,p)=f16vec4(texelFetch(tex,p,0))" | |||
| "-D image3d_ld4(tex,p)=f16vec4(texelFetch(tex,p,0))" | |||
| "-D image1d_st4(img,p,v)={imageStore(img,p,v);}" | |||
| "-D image2d_st4(img,p,v)={imageStore(img,p,v);}" | |||
| "-D image3d_st4(img,p,v)={imageStore(img,p,v);}" | |||
| "-D image1d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}" | |||
| "-D image2d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}" | |||
| "-D image3d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}" | |||
| "-D image1d_ld8(tex,p)=f16mat2x4(texelFetch(tex,p*2,0),texelFetch(tex,p*2+1,0))" | |||
| "-D image2d_ld8(tex,p)=f16mat2x4(texelFetch(tex,ivec2(p.x*2,p.y),0),texelFetch(tex,ivec2(p.x*2+1,p.y),0))" | |||
| "-D image3d_ld8(tex,p)=f16mat2x4(texelFetch(tex,ivec3(p.x*2,p.y,p.z),0),texelFetch(tex,ivec3(p.x*2+1,p.y,p.z),0))" | |||
| "-D image1d_st8(img,p,v)={imageStore(img,p*2,v[0]);imageStore(img,p*2+1,v[1]);}" | |||
| "-D image2d_st8(img,p,v)={imageStore(img,ivec2(p.x*2,p.y),v[0]);imageStore(img,ivec2(p.x*2+1,p.y),v[1]);}" | |||
| "-D image3d_st8(img,p,v)={imageStore(img,ivec3(p.x*2,p.y,p.z),v[0]);imageStore(img,ivec3(p.x*2+1,p.y,p.z),v[1]);}" | |||
| "-D image1d_cp8(img,p,tex,sp)={imageStore(img,p*2,texelFetch(tex,sp*2,0));imageStore(img,p*2+1,texelFetch(tex,sp*2+1,0));}" | |||
| "-D image2d_cp8(img,p,tex,sp)={imageStore(img,ivec2(p.x*2,p.y),texelFetch(tex,ivec2(sp.x*2,sp.y),0));imageStore(img,ivec2(p.x*2+1,p.y),texelFetch(tex,ivec2(sp.x*2+1,sp.y),0));}" | |||
| "-D image3d_cp8(img,p,tex,sp)={imageStore(img,ivec3(p.x*2,p.y,p.z),texelFetch(tex,ivec3(sp.x*2,sp.y,sp.z),0));imageStore(img,ivec3(p.x*2+1,p.y,p.z),texelFetch(tex,ivec3(sp.x*2+1,sp.y,sp.z),0));}" | |||
| "-D buffer_ld1(buf,i)=float16_t(buf[i])" | |||
| "-D buffer_st1(buf,i,v)={buf[i]=float(v);}" | |||
| "-D buffer_cp1(buf,i,sbuf,si)={buf[i]=sbuf[si];}" | |||
| "-D buffer_cp1to4(buf,i,sbuf,si4)={buf[i]=uvec2(packHalf2x16(vec2(f16vec2(sbuf[si4.r],sbuf[si4.g]))),packHalf2x16(vec2(f16vec2(sbuf[si4.b],sbuf[si4.a]))));}" | |||
| "-D buffer_cp1to8(buf,i,sbuf,si4,sii4)={buf[i]=uvec4(packHalf2x16(vec2(f16vec2(sbuf[si4.r],sbuf[si4.g]))),packHalf2x16(vec2(f16vec2(sbuf[si4.b],sbuf[si4.a]))),packHalf2x16(vec2(f16vec2(sbuf[sii4.r],sbuf[sii4.g]))),packHalf2x16(vec2(f16vec2(sbuf[sii4.b],sbuf[sii4.a]))));}" | |||
| "-D buffer_ld2(buf,i)=f16vec2(unpackHalf2x16(buf[i]))" | |||
| "-D buffer_st2(buf,i,v)={buf[i]=packHalf2x16(vec2(v))}" | |||
| "-D buffer_cp2(buf,i,sbuf,si)={buf[i]=sbuf[si];}" | |||
| "-D buffer_ld4(buf,i)=f16vec4(vec4(unpackHalf2x16(buf[i].x),unpackHalf2x16(buf[i].y)))" | |||
| "-D buffer_st4(buf,i,v)={buf[i]=uvec2(packHalf2x16(vec2(v.rg)),packHalf2x16(vec2(v.ba)));}" | |||
| "-D buffer_cp4(buf,i,sbuf,si)={buf[i]=sbuf[si];}" | |||
| "-D buffer_cp4to1(buf,i4,sbuf,si)={uvec2 _v=sbuf[si]; vec2 _v0=unpackHalf2x16(_v.x);vec2 _v1=unpackHalf2x16(_v.y); buf[i4.r]=_v0.r;buf[i4.g]=_v0.g;buf[i4.b]=_v1.r;buf[i4.a]=_v1.g;}" | |||
| "-D buffer_cp4to8(buf,i,sbuf,si2)={buf[i]=uvec4(sbuf[si2.r],sbuf[si2.g]);}" | |||
| "-D buffer_ld8(buf,i)=f16mat2x4(f16vec4(vec4(unpackHalf2x16(buf[i].r),unpackHalf2x16(buf[i].g))),f16vec4(vec4(unpackHalf2x16(buf[i].b),unpackHalf2x16(buf[i].a))))" | |||
| "-D buffer_st8(buf,i,v)={buf[i]=uvec4(uvec2(packHalf2x16(vec2(v[0].rg)),packHalf2x16(vec2(v[0].ba))),uvec2(packHalf2x16(vec2(v[1].rg)),packHalf2x16(vec2(v[1].ba))));}" | |||
| "-D buffer_cp8(buf,i,sbuf,si)={buf[i]=sbuf[si];}" | |||
| "-D buffer_cp8to1(buf,i4,ii4,sbuf,si)={uvec4 _v=sbuf[si]; vec2 _v0=unpackHalf2x16(_v.r);vec2 _v1=unpackHalf2x16(_v.g);vec2 _v2=unpackHalf2x16(_v.b);vec2 _v3=unpackHalf2x16(_v.a); buf[i4.r]=_v0.r;buf[i4.g]=_v0.g;buf[i4.b]=_v1.r;buf[i4.a]=_v1.g; buf[ii4.r]=_v2.r;buf[ii4.g]=_v2.g;buf[ii4.b]=_v3.r;buf[ii4.a]=_v3.g;}" | |||
| "-D buffer_cp8to4(buf,i2,sbuf,si)={uvec4 _v=sbuf[si]; buf[i2.r]=_v.rg;buf[i2.g]=_v.ba;}" | |||
| "-D psc(x)=(x==0?p.x:x)" | |||
| -DNCNN_image_shader=1 -DNCNN_fp16_packed=1 -DNCNN_fp16_arithmetic=1 | |||
| -V -s -x -o ${SHADER_image_fp16pa_SPV_HEX_FILE} ${SHADER_SRC} | |||
| DEPENDS ${SHADER_SRC} | |||
| COMMENT "Building SPIR-V module ${SHADER_image_fp16pa_SRC_NAME_WE}.spv" | |||
| VERBATIM | |||
| ) | |||
| set_source_files_properties(${SHADER_image_fp16pa_SPV_HEX_FILE} PROPERTIES GENERATED TRUE) | |||
| # image + fp16s | |||
| set(SHADER_image_fp16s_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image_fp16s") | |||
| @@ -401,12 +472,12 @@ function(ncnn_generate_shader_spv_header SHADER_SPV_HEADER SHADER_SPV_HEX_HEADER | |||
| ) | |||
| set_source_files_properties(${SHADER_image_fp16s_SPV_HEX_FILE} PROPERTIES GENERATED TRUE) | |||
| # image + fp16a | |||
| set(SHADER_image_fp16a_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image_fp16a") | |||
| # image + fp16s + fp16a | |||
| set(SHADER_image_fp16sa_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image_fp16sa") | |||
| set(SHADER_image_fp16a_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_image_fp16a_SRC_NAME_WE}.spv.hex.h) | |||
| set(SHADER_image_fp16sa_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_image_fp16sa_SRC_NAME_WE}.spv.hex.h) | |||
| add_custom_command( | |||
| OUTPUT ${SHADER_image_fp16a_SPV_HEX_FILE} | |||
| OUTPUT ${SHADER_image_fp16sa_SPV_HEX_FILE} | |||
| COMMAND ${GLSLANGVALIDATOR_EXECUTABLE} | |||
| ARGS -Dsfp=float16_t -Dsfpvec2=f16vec2 -Dsfpvec4=f16vec4 -Dsfpvec8=f16mat2x4 -Dsfpmat4=f16mat4 | |||
| -Dafp=float16_t -Dafpvec2=f16vec2 -Dafpvec4=f16vec4 -Dafpvec8=f16mat2x4 -Dafpmat4=f16mat4 | |||
| @@ -467,12 +538,12 @@ function(ncnn_generate_shader_spv_header SHADER_SPV_HEADER SHADER_SPV_HEX_HEADER | |||
| "-D psc(x)=(x==0?p.x:x)" | |||
| -DNCNN_image_shader=1 -DNCNN_fp16_storage=1 -DNCNN_fp16_arithmetic=1 | |||
| -V -s -x -o ${SHADER_image_fp16a_SPV_HEX_FILE} ${SHADER_SRC} | |||
| -V -s -x -o ${SHADER_image_fp16sa_SPV_HEX_FILE} ${SHADER_SRC} | |||
| DEPENDS ${SHADER_SRC} | |||
| COMMENT "Building SPIR-V module ${SHADER_image_fp16a_SRC_NAME_WE}.spv" | |||
| COMMENT "Building SPIR-V module ${SHADER_image_fp16sa_SRC_NAME_WE}.spv" | |||
| VERBATIM | |||
| ) | |||
| set_source_files_properties(${SHADER_image_fp16a_SPV_HEX_FILE} PROPERTIES GENERATED TRUE) | |||
| set_source_files_properties(${SHADER_image_fp16sa_SPV_HEX_FILE} PROPERTIES GENERATED TRUE) | |||
| set(LOCAL_SHADER_SPV_HEADER ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_SRC_NAME_WE}.spv.h) | |||
| @@ -484,8 +555,9 @@ function(ncnn_generate_shader_spv_header SHADER_SPV_HEADER SHADER_SPV_HEX_HEADER | |||
| "static const uint32_t ${SHADER_fp16sa_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_fp16sa_SRC_NAME_WE}.spv.hex.h\"\n};\n" | |||
| "static const uint32_t ${SHADER_image_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_SRC_NAME_WE}.spv.hex.h\"\n};\n" | |||
| "static const uint32_t ${SHADER_image_fp16p_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16p_SRC_NAME_WE}.spv.hex.h\"\n};\n" | |||
| "static const uint32_t ${SHADER_image_fp16pa_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16pa_SRC_NAME_WE}.spv.hex.h\"\n};\n" | |||
| "static const uint32_t ${SHADER_image_fp16s_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16s_SRC_NAME_WE}.spv.hex.h\"\n};\n" | |||
| "static const uint32_t ${SHADER_image_fp16a_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16a_SRC_NAME_WE}.spv.hex.h\"\n};\n" | |||
| "static const uint32_t ${SHADER_image_fp16sa_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16sa_SRC_NAME_WE}.spv.hex.h\"\n};\n" | |||
| ) | |||
| set_source_files_properties(${LOCAL_SHADER_SPV_HEADER} PROPERTIES GENERATED TRUE) | |||
| @@ -498,8 +570,9 @@ function(ncnn_generate_shader_spv_header SHADER_SPV_HEADER SHADER_SPV_HEX_HEADER | |||
| ${SHADER_fp16sa_SPV_HEX_FILE} | |||
| ${SHADER_image_SPV_HEX_FILE} | |||
| ${SHADER_image_fp16p_SPV_HEX_FILE} | |||
| ${SHADER_image_fp16pa_SPV_HEX_FILE} | |||
| ${SHADER_image_fp16s_SPV_HEX_FILE} | |||
| ${SHADER_image_fp16a_SPV_HEX_FILE} | |||
| ${SHADER_image_fp16sa_SPV_HEX_FILE} | |||
| ) | |||
| set(${SHADER_SPV_HEADER} ${LOCAL_SHADER_SPV_HEADER} PARENT_SCOPE) | |||
| @@ -62,8 +62,9 @@ macro(ncnn_add_shader SHADER_SRC) | |||
| string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_fp16sa_spv_data,sizeof(${SHADER_SRC_NAME_WE}_fp16sa_spv_data)},\n") | |||
| string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_spv_data)},\n") | |||
| string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16p_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16p_spv_data)},\n") | |||
| string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16pa_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16pa_spv_data)},\n") | |||
| string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16s_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16s_spv_data)},\n") | |||
| string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16a_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16a_spv_data)},\n") | |||
| string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16sa_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16sa_spv_data)},\n") | |||
| list(APPEND SHADER_SPV_HEX_FILES ${SHADER_SPV_HEADER}) | |||
| list(APPEND SHADER_SPV_HEX_FILES ${SHADER_SPV_HEX_HEADERS}) | |||
| @@ -83,9 +84,11 @@ macro(ncnn_add_shader SHADER_SRC) | |||
| math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1") | |||
| set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16p = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n") | |||
| math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1") | |||
| set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16pa = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n") | |||
| math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1") | |||
| set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16s = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n") | |||
| math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1") | |||
| set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16a = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n") | |||
| set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16sa = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n") | |||
| math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1") | |||
| endmacro() | |||
| @@ -1613,54 +1613,61 @@ int VulkanDevice::create_shader_module() | |||
| // 4 = fp16sa | |||
| // 5 = image | |||
| // 6 = image_fp16p | |||
| // 7 = image_fp16s | |||
| // 8 = image_fp16a | |||
| // 7 = image_fp16pa | |||
| // 8 = image_fp16s | |||
| // 9 = image_fp16sa | |||
| if (!info.support_fp16_packed) | |||
| { | |||
| if (i % 9 == 1) | |||
| if (i % 10 == 1) | |||
| continue; | |||
| } | |||
| if (!info.support_fp16_packed || !info.support_fp16_arithmetic) | |||
| { | |||
| if (i % 9 == 2) | |||
| if (i % 10 == 2) | |||
| continue; | |||
| } | |||
| if (!info.support_fp16_storage) | |||
| { | |||
| if (i % 9 == 3) | |||
| if (i % 10 == 3) | |||
| continue; | |||
| } | |||
| if (!info.support_fp16_storage || !info.support_fp16_arithmetic) | |||
| { | |||
| if (i % 9 == 4) | |||
| if (i % 10 == 4) | |||
| continue; | |||
| } | |||
| // if (!info.support_image_storage) | |||
| // { | |||
| // if (i % 9 == 5) | |||
| // if (i % 10 == 5) | |||
| // continue; | |||
| // } | |||
| if (!info.support_fp16_packed) | |||
| { | |||
| if (i % 9 == 6) | |||
| if (i % 10 == 6) | |||
| continue; | |||
| } | |||
| if (!info.support_fp16_packed || !info.support_fp16_arithmetic) | |||
| { | |||
| if (i % 10 == 7) | |||
| continue; | |||
| } | |||
| if (!info.support_fp16_storage) | |||
| { | |||
| if (i % 9 == 7) | |||
| if (i % 10 == 8) | |||
| continue; | |||
| } | |||
| if (!info.support_fp16_storage || !info.support_fp16_arithmetic) | |||
| { | |||
| if (i % 9 == 8) | |||
| if (i % 10 == 9) | |||
| continue; | |||
| } | |||
| @@ -97,6 +97,7 @@ void main() | |||
| afpvec4 v7 = image3d_ld4(bottom_blob, ivec3(x, y, zz4.a / 4)); | |||
| afpvec8 v; | |||
| // TODO bugihfa | |||
| v[0].r = v0[z4.r % 4]; | |||
| v[0].g = v1[z4.g % 4]; | |||
| v[0].b = v2[z4.b % 4]; | |||
| @@ -92,6 +92,7 @@ void main() | |||
| afpvec8 v3 = image3d_ld8(bottom_blob, ivec3(x, y, z4.a / 8)); | |||
| afpvec4 v; | |||
| // TODO bugihfa | |||
| v.r = v0[(z4.r % 8) / 4][z4.r % 4]; | |||
| v.g = v1[(z4.g % 8) / 4][z4.g % 4]; | |||
| v.b = v2[(z4.b % 8) / 4][z4.b % 4]; | |||
| @@ -16,10 +16,8 @@ | |||
| #if NCNN_fp16_storage | |||
| #extension GL_EXT_shader_16bit_storage: require | |||
| #if !NCNN_fp16_arithmetic | |||
| struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #endif | |||
| #endif | |||
| #if NCNN_fp16_arithmetic | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| @@ -16,10 +16,8 @@ | |||
| #if NCNN_fp16_storage | |||
| #extension GL_EXT_shader_16bit_storage: require | |||
| #if !NCNN_fp16_arithmetic | |||
| struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #endif | |||
| #endif | |||
| #if NCNN_fp16_arithmetic | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| @@ -16,10 +16,8 @@ | |||
| #if NCNN_fp16_storage | |||
| #extension GL_EXT_shader_16bit_storage: require | |||
| #if !NCNN_fp16_arithmetic | |||
| struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #endif | |||
| #endif | |||
| #if NCNN_fp16_arithmetic | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| @@ -16,10 +16,8 @@ | |||
| #if NCNN_fp16_storage | |||
| #extension GL_EXT_shader_16bit_storage: require | |||
| #if !NCNN_fp16_arithmetic | |||
| struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #endif | |||
| #endif | |||
| #if NCNN_fp16_arithmetic | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| @@ -16,10 +16,8 @@ | |||
| #if NCNN_fp16_storage | |||
| #extension GL_EXT_shader_16bit_storage: require | |||
| #if !NCNN_fp16_arithmetic | |||
| struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #endif | |||
| #endif | |||
| #if NCNN_fp16_arithmetic | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| @@ -16,10 +16,8 @@ | |||
| #if NCNN_fp16_storage | |||
| #extension GL_EXT_shader_16bit_storage: require | |||
| #if !NCNN_fp16_arithmetic | |||
| struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #endif | |||
| #endif | |||
| #if NCNN_fp16_arithmetic | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| @@ -16,10 +16,8 @@ | |||
| #if NCNN_fp16_storage | |||
| #extension GL_EXT_shader_16bit_storage: require | |||
| #if !NCNN_fp16_arithmetic | |||
| struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #endif | |||
| #endif | |||
| #if NCNN_fp16_arithmetic | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| @@ -16,10 +16,8 @@ | |||
| #if NCNN_fp16_storage | |||
| #extension GL_EXT_shader_16bit_storage: require | |||
| #if !NCNN_fp16_arithmetic | |||
| struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #endif | |||
| #endif | |||
| #if NCNN_fp16_arithmetic | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| @@ -16,10 +16,8 @@ | |||
| #if NCNN_fp16_storage | |||
| #extension GL_EXT_shader_16bit_storage: require | |||
| #if !NCNN_fp16_arithmetic | |||
| struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #endif | |||
| #endif | |||
| #if NCNN_fp16_arithmetic | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| @@ -16,10 +16,8 @@ | |||
| #if NCNN_fp16_storage | |||
| #extension GL_EXT_shader_16bit_storage: require | |||
| #if !NCNN_fp16_arithmetic | |||
| struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #endif | |||
| #endif | |||
| #if NCNN_fp16_arithmetic | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| @@ -16,10 +16,8 @@ | |||
| #if NCNN_fp16_storage | |||
| #extension GL_EXT_shader_16bit_storage: require | |||
| #if !NCNN_fp16_arithmetic | |||
| struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #endif | |||
| #endif | |||
| #if NCNN_fp16_arithmetic | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| @@ -16,10 +16,8 @@ | |||
| #if NCNN_fp16_storage | |||
| #extension GL_EXT_shader_16bit_storage: require | |||
| #if !NCNN_fp16_arithmetic | |||
| struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #endif | |||
| #endif | |||
| #if NCNN_fp16_arithmetic | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| @@ -98,6 +98,7 @@ void main() | |||
| ivec4 lane4 = z4 % 4; | |||
| // TODO bugihfa | |||
| afpvec4 v = afpvec4(vr[lane4.r], vg[lane4.g], vb[lane4.b], va[lane4.a]); | |||
| #if NCNN_image_shader | |||
| @@ -117,6 +117,7 @@ void main() | |||
| ivec4 lane4 = z4 % 4; | |||
| ivec4 lane8 = zz4 % 4; | |||
| // TODO bugihfa | |||
| afpvec8 v = afpvec8(vr[sz4.r][lane4.r], vg[sz4.g][lane4.g], vb[sz4.b][lane4.b], va[sz4.a][lane4.a], vvr[szz4.r][lane8.r], vvg[szz4.g][lane8.g], vvb[szz4.b][lane8.b], vva[szz4.a][lane8.a]); | |||
| #if NCNN_image_shader | |||
| @@ -89,18 +89,23 @@ int Pipeline::create(int shader_type_index, const Option& opt, const std::vector | |||
| // 4 = fp16sa | |||
| // 5 = image | |||
| // 6 = image_fp16p | |||
| // 7 = image_fp16s | |||
| // 8 = image_fp16a | |||
| // 7 = image_fp16pa | |||
| // 8 = image_fp16s | |||
| // 9 = image_fp16sa | |||
| if (opt.use_image_storage && opt.use_fp16_storage && opt.use_fp16_arithmetic) | |||
| if (opt.use_image_storage && vkdev->info.support_fp16_storage && opt.use_fp16_storage && vkdev->info.support_fp16_arithmetic && opt.use_fp16_arithmetic) | |||
| { | |||
| shader_type_index += 8; | |||
| shader_type_index += 9; | |||
| } | |||
| else if (opt.use_image_storage && opt.use_fp16_storage) | |||
| else if (opt.use_image_storage && vkdev->info.support_fp16_packed && opt.use_fp16_packed && vkdev->info.support_fp16_arithmetic && opt.use_fp16_arithmetic) | |||
| { | |||
| shader_type_index += 7; | |||
| } | |||
| else if (opt.use_image_storage && opt.use_fp16_packed) | |||
| else if (opt.use_image_storage && vkdev->info.support_fp16_storage && opt.use_fp16_storage) | |||
| { | |||
| shader_type_index += 8; | |||
| } | |||
| else if (opt.use_image_storage && vkdev->info.support_fp16_packed && opt.use_fp16_packed) | |||
| { | |||
| shader_type_index += 6; | |||
| } | |||
| @@ -595,18 +600,23 @@ int ImportAndroidHardwareBufferPipeline::create(VkAndroidHardwareBufferImageAllo | |||
| // 4 = fp16sa | |||
| // 5 = image | |||
| // 6 = image_fp16p | |||
| // 7 = image_fp16s | |||
| // 8 = image_fp16a | |||
| // 7 = image_fp16pa | |||
| // 8 = image_fp16s | |||
| // 9 = image_fp16sa | |||
| if (opt.use_image_storage && opt.use_fp16_storage && opt.use_fp16_arithmetic) | |||
| if (opt.use_image_storage && vkdev->info.support_fp16_storage && opt.use_fp16_storage && vkdev->info.support_fp16_arithmetic && opt.use_fp16_arithmetic) | |||
| { | |||
| shader_type_index += 8; | |||
| shader_type_index += 9; | |||
| } | |||
| else if (opt.use_image_storage && opt.use_fp16_storage) | |||
| else if (opt.use_image_storage && vkdev->info.support_fp16_packed && opt.use_fp16_packed && vkdev->info.support_fp16_arithmetic && opt.use_fp16_arithmetic) | |||
| { | |||
| shader_type_index += 7; | |||
| } | |||
| else if (opt.use_image_storage && opt.use_fp16_packed) | |||
| else if (opt.use_image_storage && vkdev->info.support_fp16_storage && opt.use_fp16_storage) | |||
| { | |||
| shader_type_index += 8; | |||
| } | |||
| else if (opt.use_image_storage && vkdev->info.support_fp16_packed && opt.use_fp16_packed) | |||
| { | |||
| shader_type_index += 6; | |||
| } | |||