Browse Source

image fp16pa, mark some bugihfa todo

tags/20200616
nihuini 6 years ago
parent
commit
6682cd1638
20 changed files with 130 additions and 57 deletions
  1. +82
    -9
      cmake/ncnn_generate_shader_spv_header.cmake
  2. +5
    -2
      src/CMakeLists.txt
  3. +17
    -10
      src/gpu.cpp
  4. +1
    -0
      src/layer/vulkan/shader/crop_pack4to8.comp
  5. +1
    -0
      src/layer/vulkan/shader/crop_pack8to4.comp
  6. +0
    -2
      src/layer/vulkan/shader/packing_pack1to8.comp
  7. +0
    -2
      src/layer/vulkan/shader/packing_pack1to8_fp16_to_fp32.comp
  8. +0
    -2
      src/layer/vulkan/shader/packing_pack1to8_fp32_to_fp16.comp
  9. +0
    -2
      src/layer/vulkan/shader/packing_pack8.comp
  10. +0
    -2
      src/layer/vulkan/shader/packing_pack8_fp16_to_fp32.comp
  11. +0
    -2
      src/layer/vulkan/shader/packing_pack8_fp32_to_fp16.comp
  12. +0
    -2
      src/layer/vulkan/shader/packing_pack8to1.comp
  13. +0
    -2
      src/layer/vulkan/shader/packing_pack8to1_fp16_to_fp32.comp
  14. +0
    -2
      src/layer/vulkan/shader/packing_pack8to1_fp32_to_fp16.comp
  15. +0
    -2
      src/layer/vulkan/shader/packing_pack8to4.comp
  16. +0
    -2
      src/layer/vulkan/shader/packing_pack8to4_fp16_to_fp32.comp
  17. +0
    -2
      src/layer/vulkan/shader/packing_pack8to4_fp32_to_fp16.comp
  18. +1
    -0
      src/layer/vulkan/shader/shufflechannel_pack4.comp
  19. +1
    -0
      src/layer/vulkan/shader/shufflechannel_pack8.comp
  20. +22
    -12
      src/pipeline.cpp

+ 82
- 9
cmake/ncnn_generate_shader_spv_header.cmake View File

@@ -328,6 +328,77 @@ function(ncnn_generate_shader_spv_header SHADER_SPV_HEADER SHADER_SPV_HEX_HEADER
)
set_source_files_properties(${SHADER_image_fp16p_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)

# image + fp16p + fp16a
set(SHADER_image_fp16pa_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image_fp16pa")

set(SHADER_image_fp16pa_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_image_fp16pa_SRC_NAME_WE}.spv.hex.h)
add_custom_command(
OUTPUT ${SHADER_image_fp16pa_SPV_HEX_FILE}
COMMAND ${GLSLANGVALIDATOR_EXECUTABLE}
ARGS -Dsfp=float -Dsfpvec2=uint -Dsfpvec4=uvec2 -Dsfpvec8=uvec4
-Dafp=float16_t -Dafpvec2=f16vec2 -Dafpvec4=f16vec4 -Dafpvec8=f16mat2x4 -Dafpmat4=f16mat4

-Dimfmtc1=r32f -Dimfmtc4=rgba16f
-Dunfp=mediump

"-D image1d_ld1(tex,p)=float16_t(texelFetch(tex,p,0).r)"
"-D image2d_ld1(tex,p)=float16_t(texelFetch(tex,p,0).r)"
"-D image3d_ld1(tex,p)=float16_t(texelFetch(tex,p,0).r)"
"-D image1d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}"
"-D image2d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}"
"-D image3d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}"
"-D image1d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
"-D image2d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
"-D image3d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"

"-D image1d_ld4(tex,p)=f16vec4(texelFetch(tex,p,0))"
"-D image2d_ld4(tex,p)=f16vec4(texelFetch(tex,p,0))"
"-D image3d_ld4(tex,p)=f16vec4(texelFetch(tex,p,0))"
"-D image1d_st4(img,p,v)={imageStore(img,p,v);}"
"-D image2d_st4(img,p,v)={imageStore(img,p,v);}"
"-D image3d_st4(img,p,v)={imageStore(img,p,v);}"
"-D image1d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
"-D image2d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
"-D image3d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"

"-D image1d_ld8(tex,p)=f16mat2x4(texelFetch(tex,p*2,0),texelFetch(tex,p*2+1,0))"
"-D image2d_ld8(tex,p)=f16mat2x4(texelFetch(tex,ivec2(p.x*2,p.y),0),texelFetch(tex,ivec2(p.x*2+1,p.y),0))"
"-D image3d_ld8(tex,p)=f16mat2x4(texelFetch(tex,ivec3(p.x*2,p.y,p.z),0),texelFetch(tex,ivec3(p.x*2+1,p.y,p.z),0))"
"-D image1d_st8(img,p,v)={imageStore(img,p*2,v[0]);imageStore(img,p*2+1,v[1]);}"
"-D image2d_st8(img,p,v)={imageStore(img,ivec2(p.x*2,p.y),v[0]);imageStore(img,ivec2(p.x*2+1,p.y),v[1]);}"
"-D image3d_st8(img,p,v)={imageStore(img,ivec3(p.x*2,p.y,p.z),v[0]);imageStore(img,ivec3(p.x*2+1,p.y,p.z),v[1]);}"
"-D image1d_cp8(img,p,tex,sp)={imageStore(img,p*2,texelFetch(tex,sp*2,0));imageStore(img,p*2+1,texelFetch(tex,sp*2+1,0));}"
"-D image2d_cp8(img,p,tex,sp)={imageStore(img,ivec2(p.x*2,p.y),texelFetch(tex,ivec2(sp.x*2,sp.y),0));imageStore(img,ivec2(p.x*2+1,p.y),texelFetch(tex,ivec2(sp.x*2+1,sp.y),0));}"
"-D image3d_cp8(img,p,tex,sp)={imageStore(img,ivec3(p.x*2,p.y,p.z),texelFetch(tex,ivec3(sp.x*2,sp.y,sp.z),0));imageStore(img,ivec3(p.x*2+1,p.y,p.z),texelFetch(tex,ivec3(sp.x*2+1,sp.y,sp.z),0));}"

"-D buffer_ld1(buf,i)=float16_t(buf[i])"
"-D buffer_st1(buf,i,v)={buf[i]=float(v);}"
"-D buffer_cp1(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
"-D buffer_cp1to4(buf,i,sbuf,si4)={buf[i]=uvec2(packHalf2x16(vec2(f16vec2(sbuf[si4.r],sbuf[si4.g]))),packHalf2x16(vec2(f16vec2(sbuf[si4.b],sbuf[si4.a]))));}"
"-D buffer_cp1to8(buf,i,sbuf,si4,sii4)={buf[i]=uvec4(packHalf2x16(vec2(f16vec2(sbuf[si4.r],sbuf[si4.g]))),packHalf2x16(vec2(f16vec2(sbuf[si4.b],sbuf[si4.a]))),packHalf2x16(vec2(f16vec2(sbuf[sii4.r],sbuf[sii4.g]))),packHalf2x16(vec2(f16vec2(sbuf[sii4.b],sbuf[sii4.a]))));}"
"-D buffer_ld2(buf,i)=f16vec2(unpackHalf2x16(buf[i]))"
"-D buffer_st2(buf,i,v)={buf[i]=packHalf2x16(vec2(v))}"
"-D buffer_cp2(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
"-D buffer_ld4(buf,i)=f16vec4(vec4(unpackHalf2x16(buf[i].x),unpackHalf2x16(buf[i].y)))"
"-D buffer_st4(buf,i,v)={buf[i]=uvec2(packHalf2x16(vec2(v.rg)),packHalf2x16(vec2(v.ba)));}"
"-D buffer_cp4(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
"-D buffer_cp4to1(buf,i4,sbuf,si)={uvec2 _v=sbuf[si]; vec2 _v0=unpackHalf2x16(_v.x);vec2 _v1=unpackHalf2x16(_v.y); buf[i4.r]=_v0.r;buf[i4.g]=_v0.g;buf[i4.b]=_v1.r;buf[i4.a]=_v1.g;}"
"-D buffer_cp4to8(buf,i,sbuf,si2)={buf[i]=uvec4(sbuf[si2.r],sbuf[si2.g]);}"
"-D buffer_ld8(buf,i)=f16mat2x4(f16vec4(vec4(unpackHalf2x16(buf[i].r),unpackHalf2x16(buf[i].g))),f16vec4(vec4(unpackHalf2x16(buf[i].b),unpackHalf2x16(buf[i].a))))"
"-D buffer_st8(buf,i,v)={buf[i]=uvec4(uvec2(packHalf2x16(vec2(v[0].rg)),packHalf2x16(vec2(v[0].ba))),uvec2(packHalf2x16(vec2(v[1].rg)),packHalf2x16(vec2(v[1].ba))));}"
"-D buffer_cp8(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
"-D buffer_cp8to1(buf,i4,ii4,sbuf,si)={uvec4 _v=sbuf[si]; vec2 _v0=unpackHalf2x16(_v.r);vec2 _v1=unpackHalf2x16(_v.g);vec2 _v2=unpackHalf2x16(_v.b);vec2 _v3=unpackHalf2x16(_v.a); buf[i4.r]=_v0.r;buf[i4.g]=_v0.g;buf[i4.b]=_v1.r;buf[i4.a]=_v1.g; buf[ii4.r]=_v2.r;buf[ii4.g]=_v2.g;buf[ii4.b]=_v3.r;buf[ii4.a]=_v3.g;}"
"-D buffer_cp8to4(buf,i2,sbuf,si)={uvec4 _v=sbuf[si]; buf[i2.r]=_v.rg;buf[i2.g]=_v.ba;}"

"-D psc(x)=(x==0?p.x:x)"
-DNCNN_image_shader=1 -DNCNN_fp16_packed=1 -DNCNN_fp16_arithmetic=1
-V -s -x -o ${SHADER_image_fp16pa_SPV_HEX_FILE} ${SHADER_SRC}
DEPENDS ${SHADER_SRC}
COMMENT "Building SPIR-V module ${SHADER_image_fp16pa_SRC_NAME_WE}.spv"
VERBATIM
)
set_source_files_properties(${SHADER_image_fp16pa_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)

# image + fp16s
set(SHADER_image_fp16s_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image_fp16s")

@@ -401,12 +472,12 @@ function(ncnn_generate_shader_spv_header SHADER_SPV_HEADER SHADER_SPV_HEX_HEADER
)
set_source_files_properties(${SHADER_image_fp16s_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)

# image + fp16a
set(SHADER_image_fp16a_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image_fp16a")
# image + fp16s + fp16a
set(SHADER_image_fp16sa_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image_fp16sa")

set(SHADER_image_fp16a_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_image_fp16a_SRC_NAME_WE}.spv.hex.h)
set(SHADER_image_fp16sa_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_image_fp16sa_SRC_NAME_WE}.spv.hex.h)
add_custom_command(
OUTPUT ${SHADER_image_fp16a_SPV_HEX_FILE}
OUTPUT ${SHADER_image_fp16sa_SPV_HEX_FILE}
COMMAND ${GLSLANGVALIDATOR_EXECUTABLE}
ARGS -Dsfp=float16_t -Dsfpvec2=f16vec2 -Dsfpvec4=f16vec4 -Dsfpvec8=f16mat2x4 -Dsfpmat4=f16mat4
-Dafp=float16_t -Dafpvec2=f16vec2 -Dafpvec4=f16vec4 -Dafpvec8=f16mat2x4 -Dafpmat4=f16mat4
@@ -467,12 +538,12 @@ function(ncnn_generate_shader_spv_header SHADER_SPV_HEADER SHADER_SPV_HEX_HEADER

"-D psc(x)=(x==0?p.x:x)"
-DNCNN_image_shader=1 -DNCNN_fp16_storage=1 -DNCNN_fp16_arithmetic=1
-V -s -x -o ${SHADER_image_fp16a_SPV_HEX_FILE} ${SHADER_SRC}
-V -s -x -o ${SHADER_image_fp16sa_SPV_HEX_FILE} ${SHADER_SRC}
DEPENDS ${SHADER_SRC}
COMMENT "Building SPIR-V module ${SHADER_image_fp16a_SRC_NAME_WE}.spv"
COMMENT "Building SPIR-V module ${SHADER_image_fp16sa_SRC_NAME_WE}.spv"
VERBATIM
)
set_source_files_properties(${SHADER_image_fp16a_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)
set_source_files_properties(${SHADER_image_fp16sa_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)

set(LOCAL_SHADER_SPV_HEADER ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_SRC_NAME_WE}.spv.h)

@@ -484,8 +555,9 @@ function(ncnn_generate_shader_spv_header SHADER_SPV_HEADER SHADER_SPV_HEX_HEADER
"static const uint32_t ${SHADER_fp16sa_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_fp16sa_SRC_NAME_WE}.spv.hex.h\"\n};\n"
"static const uint32_t ${SHADER_image_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_SRC_NAME_WE}.spv.hex.h\"\n};\n"
"static const uint32_t ${SHADER_image_fp16p_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16p_SRC_NAME_WE}.spv.hex.h\"\n};\n"
"static const uint32_t ${SHADER_image_fp16pa_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16pa_SRC_NAME_WE}.spv.hex.h\"\n};\n"
"static const uint32_t ${SHADER_image_fp16s_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16s_SRC_NAME_WE}.spv.hex.h\"\n};\n"
"static const uint32_t ${SHADER_image_fp16a_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16a_SRC_NAME_WE}.spv.hex.h\"\n};\n"
"static const uint32_t ${SHADER_image_fp16sa_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16sa_SRC_NAME_WE}.spv.hex.h\"\n};\n"
)

set_source_files_properties(${LOCAL_SHADER_SPV_HEADER} PROPERTIES GENERATED TRUE)
@@ -498,8 +570,9 @@ function(ncnn_generate_shader_spv_header SHADER_SPV_HEADER SHADER_SPV_HEX_HEADER
${SHADER_fp16sa_SPV_HEX_FILE}
${SHADER_image_SPV_HEX_FILE}
${SHADER_image_fp16p_SPV_HEX_FILE}
${SHADER_image_fp16pa_SPV_HEX_FILE}
${SHADER_image_fp16s_SPV_HEX_FILE}
${SHADER_image_fp16a_SPV_HEX_FILE}
${SHADER_image_fp16sa_SPV_HEX_FILE}
)

set(${SHADER_SPV_HEADER} ${LOCAL_SHADER_SPV_HEADER} PARENT_SCOPE)


+ 5
- 2
src/CMakeLists.txt View File

@@ -62,8 +62,9 @@ macro(ncnn_add_shader SHADER_SRC)
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_fp16sa_spv_data,sizeof(${SHADER_SRC_NAME_WE}_fp16sa_spv_data)},\n")
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_spv_data)},\n")
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16p_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16p_spv_data)},\n")
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16pa_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16pa_spv_data)},\n")
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16s_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16s_spv_data)},\n")
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16a_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16a_spv_data)},\n")
string(APPEND layer_shader_registry "{${SHADER_SRC_NAME_WE}_image_fp16sa_spv_data,sizeof(${SHADER_SRC_NAME_WE}_image_fp16sa_spv_data)},\n")

list(APPEND SHADER_SPV_HEX_FILES ${SHADER_SPV_HEADER})
list(APPEND SHADER_SPV_HEX_FILES ${SHADER_SPV_HEX_HEADERS})
@@ -83,9 +84,11 @@ macro(ncnn_add_shader SHADER_SRC)
math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1")
set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16p = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n")
math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1")
set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16pa = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n")
math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1")
set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16s = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n")
math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1")
set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16a = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n")
set(layer_shader_type_enum "${layer_shader_type_enum}${SHADER_SRC_NAME_WE}_image_fp16sa = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n")
math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1")
endmacro()



+ 17
- 10
src/gpu.cpp View File

@@ -1613,54 +1613,61 @@ int VulkanDevice::create_shader_module()
// 4 = fp16sa
// 5 = image
// 6 = image_fp16p
// 7 = image_fp16s
// 8 = image_fp16a
// 7 = image_fp16pa
// 8 = image_fp16s
// 9 = image_fp16sa

if (!info.support_fp16_packed)
{
if (i % 9 == 1)
if (i % 10 == 1)
continue;
}

if (!info.support_fp16_packed || !info.support_fp16_arithmetic)
{
if (i % 9 == 2)
if (i % 10 == 2)
continue;
}

if (!info.support_fp16_storage)
{
if (i % 9 == 3)
if (i % 10 == 3)
continue;
}

if (!info.support_fp16_storage || !info.support_fp16_arithmetic)
{
if (i % 9 == 4)
if (i % 10 == 4)
continue;
}

// if (!info.support_image_storage)
// {
// if (i % 9 == 5)
// if (i % 10 == 5)
// continue;
// }

if (!info.support_fp16_packed)
{
if (i % 9 == 6)
if (i % 10 == 6)
continue;
}

if (!info.support_fp16_packed || !info.support_fp16_arithmetic)
{
if (i % 10 == 7)
continue;
}

if (!info.support_fp16_storage)
{
if (i % 9 == 7)
if (i % 10 == 8)
continue;
}

if (!info.support_fp16_storage || !info.support_fp16_arithmetic)
{
if (i % 9 == 8)
if (i % 10 == 9)
continue;
}



+ 1
- 0
src/layer/vulkan/shader/crop_pack4to8.comp View File

@@ -97,6 +97,7 @@ void main()
afpvec4 v7 = image3d_ld4(bottom_blob, ivec3(x, y, zz4.a / 4));

afpvec8 v;
// TODO bugihfa
v[0].r = v0[z4.r % 4];
v[0].g = v1[z4.g % 4];
v[0].b = v2[z4.b % 4];


+ 1
- 0
src/layer/vulkan/shader/crop_pack8to4.comp View File

@@ -92,6 +92,7 @@ void main()
afpvec8 v3 = image3d_ld8(bottom_blob, ivec3(x, y, z4.a / 8));

afpvec4 v;
// TODO bugihfa
v.r = v0[(z4.r % 8) / 4][z4.r % 4];
v.g = v1[(z4.g % 8) / 4][z4.g % 4];
v.b = v2[(z4.b % 8) / 4][z4.b % 4];


+ 0
- 2
src/layer/vulkan/shader/packing_pack1to8.comp View File

@@ -16,10 +16,8 @@

#if NCNN_fp16_storage
#extension GL_EXT_shader_16bit_storage: require
#if !NCNN_fp16_arithmetic
struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; };
#endif
#endif
#if NCNN_fp16_arithmetic
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
#endif


+ 0
- 2
src/layer/vulkan/shader/packing_pack1to8_fp16_to_fp32.comp View File

@@ -16,10 +16,8 @@

#if NCNN_fp16_storage
#extension GL_EXT_shader_16bit_storage: require
#if !NCNN_fp16_arithmetic
struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; };
#endif
#endif
#if NCNN_fp16_arithmetic
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
#endif


+ 0
- 2
src/layer/vulkan/shader/packing_pack1to8_fp32_to_fp16.comp View File

@@ -16,10 +16,8 @@

#if NCNN_fp16_storage
#extension GL_EXT_shader_16bit_storage: require
#if !NCNN_fp16_arithmetic
struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; };
#endif
#endif
#if NCNN_fp16_arithmetic
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
#endif


+ 0
- 2
src/layer/vulkan/shader/packing_pack8.comp View File

@@ -16,10 +16,8 @@

#if NCNN_fp16_storage
#extension GL_EXT_shader_16bit_storage: require
#if !NCNN_fp16_arithmetic
struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; };
#endif
#endif
#if NCNN_fp16_arithmetic
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
#endif


+ 0
- 2
src/layer/vulkan/shader/packing_pack8_fp16_to_fp32.comp View File

@@ -16,10 +16,8 @@

#if NCNN_fp16_storage
#extension GL_EXT_shader_16bit_storage: require
#if !NCNN_fp16_arithmetic
struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; };
#endif
#endif
#if NCNN_fp16_arithmetic
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
#endif


+ 0
- 2
src/layer/vulkan/shader/packing_pack8_fp32_to_fp16.comp View File

@@ -16,10 +16,8 @@

#if NCNN_fp16_storage
#extension GL_EXT_shader_16bit_storage: require
#if !NCNN_fp16_arithmetic
struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; };
#endif
#endif
#if NCNN_fp16_arithmetic
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
#endif


+ 0
- 2
src/layer/vulkan/shader/packing_pack8to1.comp View File

@@ -16,10 +16,8 @@

#if NCNN_fp16_storage
#extension GL_EXT_shader_16bit_storage: require
#if !NCNN_fp16_arithmetic
struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; };
#endif
#endif
#if NCNN_fp16_arithmetic
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
#endif


+ 0
- 2
src/layer/vulkan/shader/packing_pack8to1_fp16_to_fp32.comp View File

@@ -16,10 +16,8 @@

#if NCNN_fp16_storage
#extension GL_EXT_shader_16bit_storage: require
#if !NCNN_fp16_arithmetic
struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; };
#endif
#endif
#if NCNN_fp16_arithmetic
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
#endif


+ 0
- 2
src/layer/vulkan/shader/packing_pack8to1_fp32_to_fp16.comp View File

@@ -16,10 +16,8 @@

#if NCNN_fp16_storage
#extension GL_EXT_shader_16bit_storage: require
#if !NCNN_fp16_arithmetic
struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; };
#endif
#endif
#if NCNN_fp16_arithmetic
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
#endif


+ 0
- 2
src/layer/vulkan/shader/packing_pack8to4.comp View File

@@ -16,10 +16,8 @@

#if NCNN_fp16_storage
#extension GL_EXT_shader_16bit_storage: require
#if !NCNN_fp16_arithmetic
struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; };
#endif
#endif
#if NCNN_fp16_arithmetic
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
#endif


+ 0
- 2
src/layer/vulkan/shader/packing_pack8to4_fp16_to_fp32.comp View File

@@ -16,10 +16,8 @@

#if NCNN_fp16_storage
#extension GL_EXT_shader_16bit_storage: require
#if !NCNN_fp16_arithmetic
struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; };
#endif
#endif
#if NCNN_fp16_arithmetic
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
#endif


+ 0
- 2
src/layer/vulkan/shader/packing_pack8to4_fp32_to_fp16.comp View File

@@ -16,10 +16,8 @@

#if NCNN_fp16_storage
#extension GL_EXT_shader_16bit_storage: require
#if !NCNN_fp16_arithmetic
struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; };
#endif
#endif
#if NCNN_fp16_arithmetic
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
#endif


+ 1
- 0
src/layer/vulkan/shader/shufflechannel_pack4.comp View File

@@ -98,6 +98,7 @@ void main()

ivec4 lane4 = z4 % 4;

// TODO bugihfa
afpvec4 v = afpvec4(vr[lane4.r], vg[lane4.g], vb[lane4.b], va[lane4.a]);

#if NCNN_image_shader


+ 1
- 0
src/layer/vulkan/shader/shufflechannel_pack8.comp View File

@@ -117,6 +117,7 @@ void main()
ivec4 lane4 = z4 % 4;
ivec4 lane8 = zz4 % 4;

// TODO bugihfa
afpvec8 v = afpvec8(vr[sz4.r][lane4.r], vg[sz4.g][lane4.g], vb[sz4.b][lane4.b], va[sz4.a][lane4.a], vvr[szz4.r][lane8.r], vvg[szz4.g][lane8.g], vvb[szz4.b][lane8.b], vva[szz4.a][lane8.a]);

#if NCNN_image_shader


+ 22
- 12
src/pipeline.cpp View File

@@ -89,18 +89,23 @@ int Pipeline::create(int shader_type_index, const Option& opt, const std::vector
// 4 = fp16sa
// 5 = image
// 6 = image_fp16p
// 7 = image_fp16s
// 8 = image_fp16a
// 7 = image_fp16pa
// 8 = image_fp16s
// 9 = image_fp16sa

if (opt.use_image_storage && opt.use_fp16_storage && opt.use_fp16_arithmetic)
if (opt.use_image_storage && vkdev->info.support_fp16_storage && opt.use_fp16_storage && vkdev->info.support_fp16_arithmetic && opt.use_fp16_arithmetic)
{
shader_type_index += 8;
shader_type_index += 9;
}
else if (opt.use_image_storage && opt.use_fp16_storage)
else if (opt.use_image_storage && vkdev->info.support_fp16_packed && opt.use_fp16_packed && vkdev->info.support_fp16_arithmetic && opt.use_fp16_arithmetic)
{
shader_type_index += 7;
}
else if (opt.use_image_storage && opt.use_fp16_packed)
else if (opt.use_image_storage && vkdev->info.support_fp16_storage && opt.use_fp16_storage)
{
shader_type_index += 8;
}
else if (opt.use_image_storage && vkdev->info.support_fp16_packed && opt.use_fp16_packed)
{
shader_type_index += 6;
}
@@ -595,18 +600,23 @@ int ImportAndroidHardwareBufferPipeline::create(VkAndroidHardwareBufferImageAllo
// 4 = fp16sa
// 5 = image
// 6 = image_fp16p
// 7 = image_fp16s
// 8 = image_fp16a
// 7 = image_fp16pa
// 8 = image_fp16s
// 9 = image_fp16sa

if (opt.use_image_storage && opt.use_fp16_storage && opt.use_fp16_arithmetic)
if (opt.use_image_storage && vkdev->info.support_fp16_storage && opt.use_fp16_storage && vkdev->info.support_fp16_arithmetic && opt.use_fp16_arithmetic)
{
shader_type_index += 8;
shader_type_index += 9;
}
else if (opt.use_image_storage && opt.use_fp16_storage)
else if (opt.use_image_storage && vkdev->info.support_fp16_packed && opt.use_fp16_packed && vkdev->info.support_fp16_arithmetic && opt.use_fp16_arithmetic)
{
shader_type_index += 7;
}
else if (opt.use_image_storage && opt.use_fp16_packed)
else if (opt.use_image_storage && vkdev->info.support_fp16_storage && opt.use_fp16_storage)
{
shader_type_index += 8;
}
else if (opt.use_image_storage && vkdev->info.support_fp16_packed && opt.use_fp16_packed)
{
shader_type_index += 6;
}


Loading…
Cancel
Save