Browse Source

initial fp16s fp16a shader build system

tags/20190611
nihuini 7 years ago
parent
commit
a3a2548aa2
3 changed files with 88 additions and 7 deletions
  1. +38
    -1
      src/CMakeLists.txt
  2. +25
    -3
      src/layer/shader/absval.comp
  3. +25
    -3
      src/layer/shader/absval_pack4.comp

+ 38
- 1
src/CMakeLists.txt View File

@@ -87,7 +87,6 @@ macro(ncnn_add_layer class)
file(GLOB_RECURSE SHADER_SUBSRCS "layer/shader/${name}_*.comp")
list(APPEND SHADER_SRCS ${SHADER_SUBSRCS})
foreach(SHADER_SRC ${SHADER_SRCS})
get_filename_component(SHADER_SRC_NAME ${SHADER_SRC} NAME)
get_filename_component(SHADER_SRC_NAME_WE ${SHADER_SRC} NAME_WE)

set(SHADER_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_SRC_NAME_WE}.spv.hex.h)
@@ -105,6 +104,44 @@ macro(ncnn_add_layer class)
string(APPEND layer_shader_registry "{\"${SHADER_SRC_NAME_WE}\",${SHADER_SRC_NAME_WE}_spv_data,sizeof(${SHADER_SRC_NAME_WE}_spv_data)},\n")

list(APPEND SHADER_SPV_HEX_FILES ${SHADER_SPV_HEX_FILE})

# fp16 storage
set(SHADER_fp16s_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_fp16s")

set(SHADER_fp16s_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_fp16s_SRC_NAME_WE}.spv.hex.h)
add_custom_command(
OUTPUT ${SHADER_fp16s_SPV_HEX_FILE}
COMMAND ${GLSLANGVALIDATOR_EXECUTABLE}
ARGS -DNCNN_fp16_storage=1 -V -s -e ${SHADER_fp16s_SRC_NAME_WE} --source-entrypoint main -x -o ${SHADER_fp16s_SPV_HEX_FILE} ${SHADER_SRC}
DEPENDS ${SHADER_SRC}
COMMENT "Building SPIR-V module ${SHADER_fp16s_SRC_NAME_WE}.spv"
VERBATIM
)
set_source_files_properties(${SHADER_fp16s_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)

string(APPEND layer_shader_spv_data "static const uint32_t ${SHADER_fp16s_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_fp16s_SRC_NAME_WE}.spv.hex.h\"\n};\n")
string(APPEND layer_shader_registry "{\"${SHADER_fp16s_SRC_NAME_WE}\",${SHADER_fp16s_SRC_NAME_WE}_spv_data,sizeof(${SHADER_fp16s_SRC_NAME_WE}_spv_data)},\n")

list(APPEND SHADER_SPV_HEX_FILES ${SHADER_fp16s_SPV_HEX_FILE})

# fp16 storage + fp16 arithmetic
set(SHADER_fp16a_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_fp16a")

set(SHADER_fp16a_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_fp16a_SRC_NAME_WE}.spv.hex.h)
add_custom_command(
OUTPUT ${SHADER_fp16a_SPV_HEX_FILE}
COMMAND ${GLSLANGVALIDATOR_EXECUTABLE}
ARGS -DNCNN_fp16_storage=1 -DNCNN_fp16_arithmetic=1 -V -s -e ${SHADER_fp16a_SRC_NAME_WE} --source-entrypoint main -x -o ${SHADER_fp16a_SPV_HEX_FILE} ${SHADER_SRC}
DEPENDS ${SHADER_SRC}
COMMENT "Building SPIR-V module ${SHADER_fp16a_SRC_NAME_WE}.spv"
VERBATIM
)
set_source_files_properties(${SHADER_fp16a_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)

string(APPEND layer_shader_spv_data "static const uint32_t ${SHADER_fp16a_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_fp16a_SRC_NAME_WE}.spv.hex.h\"\n};\n")
string(APPEND layer_shader_registry "{\"${SHADER_fp16a_SRC_NAME_WE}\",${SHADER_fp16a_SRC_NAME_WE}_spv_data,sizeof(${SHADER_fp16a_SRC_NAME_WE}_spv_data)},\n")

list(APPEND SHADER_SPV_HEX_FILES ${SHADER_fp16a_SPV_HEX_FILE})
endforeach()
endif()



+ 25
- 3
src/layer/shader/absval.comp View File

@@ -14,11 +14,33 @@

#version 450

#if NCNN_fp16_storage
#extension GL_AMD_gpu_shader_half_float: require
#define sfp float16_t
#define sfpvec4 f16vec4
#define sfpmat4 f16mat4
#else
#define sfp float
#define sfpvec4 vec4
#define sfpmat4 mat4
#endif

#if NCNN_fp16_arithmetic
#extension GL_AMD_gpu_shader_half_float: require
#define afp float16_t
#define afpvec4 f16vec4
#define afpmat4 f16mat4
#else
#define afp float
#define afpvec4 vec4
#define afpmat4 mat4
#endif

layout (local_size_x_id = 233) in;
layout (local_size_y_id = 234) in;
layout (local_size_z_id = 235) in;

layout (binding = 0) buffer bottom_top_blob { float bottom_top_blob_data[]; };
layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
@@ -40,7 +62,7 @@ void main()

const int gi = gz * p.cstep + gy * p.w + gx;

float v = bottom_top_blob_data[gi];
afp v = bottom_top_blob_data[gi];

bottom_top_blob_data[gi] = abs(v);
bottom_top_blob_data[gi] = sfp(abs(v));
}

+ 25
- 3
src/layer/shader/absval_pack4.comp View File

@@ -14,11 +14,33 @@

#version 450

#if NCNN_fp16_storage
#extension GL_AMD_gpu_shader_half_float: require
#define sfp float16_t
#define sfpvec4 f16vec4
#define sfpmat4 f16mat4
#else
#define sfp float
#define sfpvec4 vec4
#define sfpmat4 mat4
#endif

#if NCNN_fp16_arithmetic
#extension GL_AMD_gpu_shader_half_float: require
#define afp float16_t
#define afpvec4 f16vec4
#define afpmat4 f16mat4
#else
#define afp float
#define afpvec4 vec4
#define afpmat4 mat4
#endif

layout (local_size_x_id = 233) in;
layout (local_size_y_id = 234) in;
layout (local_size_z_id = 235) in;

layout (binding = 0) buffer bottom_top_blob { vec4 bottom_top_blob_data[]; };
layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
@@ -40,7 +62,7 @@ void main()

const int gi = gz * p.cstep + gy * p.w + gx;

vec4 v = bottom_top_blob_data[gi];
afpvec4 v = bottom_top_blob_data[gi];

bottom_top_blob_data[gi] = abs(v);
bottom_top_blob_data[gi] = sfpvec4(abs(v));
}

Loading…
Cancel
Save