From a3a2548aa28a9ff7924f76d5085fabe94de79ddb Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 21 Mar 2019 14:05:28 +0800 Subject: [PATCH] initial fp16s fp16a shader build system --- src/CMakeLists.txt | 39 +++++++++++++++++++++++++++++- src/layer/shader/absval.comp | 28 ++++++++++++++++++--- src/layer/shader/absval_pack4.comp | 28 ++++++++++++++++++--- 3 files changed, 88 insertions(+), 7 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c883ff562..bfb222b4d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -87,7 +87,6 @@ macro(ncnn_add_layer class) file(GLOB_RECURSE SHADER_SUBSRCS "layer/shader/${name}_*.comp") list(APPEND SHADER_SRCS ${SHADER_SUBSRCS}) foreach(SHADER_SRC ${SHADER_SRCS}) - get_filename_component(SHADER_SRC_NAME ${SHADER_SRC} NAME) get_filename_component(SHADER_SRC_NAME_WE ${SHADER_SRC} NAME_WE) set(SHADER_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_SRC_NAME_WE}.spv.hex.h) @@ -105,6 +104,44 @@ macro(ncnn_add_layer class) string(APPEND layer_shader_registry "{\"${SHADER_SRC_NAME_WE}\",${SHADER_SRC_NAME_WE}_spv_data,sizeof(${SHADER_SRC_NAME_WE}_spv_data)},\n") list(APPEND SHADER_SPV_HEX_FILES ${SHADER_SPV_HEX_FILE}) + + # fp16 storage + set(SHADER_fp16s_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_fp16s") + + set(SHADER_fp16s_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_fp16s_SRC_NAME_WE}.spv.hex.h) + add_custom_command( + OUTPUT ${SHADER_fp16s_SPV_HEX_FILE} + COMMAND ${GLSLANGVALIDATOR_EXECUTABLE} + ARGS -DNCNN_fp16_storage=1 -V -s -e ${SHADER_fp16s_SRC_NAME_WE} --source-entrypoint main -x -o ${SHADER_fp16s_SPV_HEX_FILE} ${SHADER_SRC} + DEPENDS ${SHADER_SRC} + COMMENT "Building SPIR-V module ${SHADER_fp16s_SRC_NAME_WE}.spv" + VERBATIM + ) + set_source_files_properties(${SHADER_fp16s_SPV_HEX_FILE} PROPERTIES GENERATED TRUE) + + string(APPEND layer_shader_spv_data "static const uint32_t ${SHADER_fp16s_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_fp16s_SRC_NAME_WE}.spv.hex.h\"\n};\n") + string(APPEND layer_shader_registry "{\"${SHADER_fp16s_SRC_NAME_WE}\",${SHADER_fp16s_SRC_NAME_WE}_spv_data,sizeof(${SHADER_fp16s_SRC_NAME_WE}_spv_data)},\n") + + list(APPEND SHADER_SPV_HEX_FILES ${SHADER_fp16s_SPV_HEX_FILE}) + + # fp16 storage + fp16 arithmetic + set(SHADER_fp16a_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_fp16a") + + set(SHADER_fp16a_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_fp16a_SRC_NAME_WE}.spv.hex.h) + add_custom_command( + OUTPUT ${SHADER_fp16a_SPV_HEX_FILE} + COMMAND ${GLSLANGVALIDATOR_EXECUTABLE} + ARGS -DNCNN_fp16_storage=1 -DNCNN_fp16_arithmetic=1 -V -s -e ${SHADER_fp16a_SRC_NAME_WE} --source-entrypoint main -x -o ${SHADER_fp16a_SPV_HEX_FILE} ${SHADER_SRC} + DEPENDS ${SHADER_SRC} + COMMENT "Building SPIR-V module ${SHADER_fp16a_SRC_NAME_WE}.spv" + VERBATIM + ) + set_source_files_properties(${SHADER_fp16a_SPV_HEX_FILE} PROPERTIES GENERATED TRUE) + + string(APPEND layer_shader_spv_data "static const uint32_t ${SHADER_fp16a_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_fp16a_SRC_NAME_WE}.spv.hex.h\"\n};\n") + string(APPEND layer_shader_registry "{\"${SHADER_fp16a_SRC_NAME_WE}\",${SHADER_fp16a_SRC_NAME_WE}_spv_data,sizeof(${SHADER_fp16a_SRC_NAME_WE}_spv_data)},\n") + + list(APPEND SHADER_SPV_HEX_FILES ${SHADER_fp16a_SPV_HEX_FILE}) endforeach() endif() diff --git a/src/layer/shader/absval.comp b/src/layer/shader/absval.comp index 530e30381..79b7f3036 100644 --- a/src/layer/shader/absval.comp +++ b/src/layer/shader/absval.comp @@ -14,11 +14,33 @@ #version 450 +#if NCNN_fp16_storage +#extension GL_AMD_gpu_shader_half_float: require +#define sfp float16_t +#define sfpvec4 f16vec4 +#define sfpmat4 f16mat4 +#else +#define sfp float +#define sfpvec4 vec4 +#define sfpmat4 mat4 +#endif + +#if NCNN_fp16_arithmetic +#extension GL_AMD_gpu_shader_half_float: require +#define afp float16_t +#define afpvec4 f16vec4 +#define afpmat4 f16mat4 +#else +#define afp float +#define afpvec4 vec4 +#define afpmat4 mat4 +#endif + layout (local_size_x_id = 233) in; layout (local_size_y_id = 234) in; layout (local_size_z_id = 235) in; -layout (binding = 0) buffer bottom_top_blob { float bottom_top_blob_data[]; }; +layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; layout (push_constant) uniform parameter { @@ -40,7 +62,7 @@ void main() const int gi = gz * p.cstep + gy * p.w + gx; - float v = bottom_top_blob_data[gi]; + afp v = bottom_top_blob_data[gi]; - bottom_top_blob_data[gi] = abs(v); + bottom_top_blob_data[gi] = sfp(abs(v)); } diff --git a/src/layer/shader/absval_pack4.comp b/src/layer/shader/absval_pack4.comp index c0f2e2bd2..65ff690fa 100644 --- a/src/layer/shader/absval_pack4.comp +++ b/src/layer/shader/absval_pack4.comp @@ -14,11 +14,33 @@ #version 450 +#if NCNN_fp16_storage +#extension GL_AMD_gpu_shader_half_float: require +#define sfp float16_t +#define sfpvec4 f16vec4 +#define sfpmat4 f16mat4 +#else +#define sfp float +#define sfpvec4 vec4 +#define sfpmat4 mat4 +#endif + +#if NCNN_fp16_arithmetic +#extension GL_AMD_gpu_shader_half_float: require +#define afp float16_t +#define afpvec4 f16vec4 +#define afpmat4 f16mat4 +#else +#define afp float +#define afpvec4 vec4 +#define afpmat4 mat4 +#endif + layout (local_size_x_id = 233) in; layout (local_size_y_id = 234) in; layout (local_size_z_id = 235) in; -layout (binding = 0) buffer bottom_top_blob { vec4 bottom_top_blob_data[]; }; +layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (push_constant) uniform parameter { @@ -40,7 +62,7 @@ void main() const int gi = gz * p.cstep + gy * p.w + gx; - vec4 v = bottom_top_blob_data[gi]; + afpvec4 v = bottom_top_blob_data[gi]; - bottom_top_blob_data[gi] = abs(v); + bottom_top_blob_data[gi] = sfpvec4(abs(v)); }