diff --git a/cmake/ncnn_generate_shader_comp_header.cmake b/cmake/ncnn_generate_shader_comp_header.cmake index 33883f0bd..5d0fb05d5 100644 --- a/cmake/ncnn_generate_shader_comp_header.cmake +++ b/cmake/ncnn_generate_shader_comp_header.cmake @@ -5,7 +5,9 @@ file(READ ${SHADER_SRC} comp_data) # skip leading comment string(FIND "${comp_data}" "#version" version_start) -string(SUBSTRING "${comp_data}" ${version_start} -1 comp_data) +if(NOT ${version_start} EQUAL -1) + string(SUBSTRING "${comp_data}" ${version_start} -1 comp_data) +endif() # remove whitespace string(REGEX REPLACE "\n +" "\n" comp_data "${comp_data}") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 475575cb8..5cd3112f3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -157,6 +157,7 @@ ncnn_add_layer(DeconvolutionDepthWise3D) if(NCNN_VULKAN) ncnn_add_shader(${CMAKE_CURRENT_SOURCE_DIR}/convert_ycbcr.comp) + ncnn_add_shader(${CMAKE_CURRENT_SOURCE_DIR}/layer/vulkan/shader/vulkan_activation.comp) endif() add_custom_target(ncnn-generate-spirv DEPENDS ${NCNN_SHADER_SPV_HEX_FILES}) diff --git a/src/gpu.cpp b/src/gpu.cpp index f043e787f..c5a764891 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -23,6 +23,8 @@ #include "glslang/SPIRV/GlslangToSpv.h" #include "glslang/glslang/Public/ShaderLang.h" +#include "vulkan_activation.comp.hex.h" + #include "command.h" #include "layer.h" #include "layer/vulkan/packing_vulkan.h" @@ -1725,7 +1727,7 @@ int VulkanDevicePrivate::create_dummy_buffer_image() dummy_buffer.create(1, 4u, dummy_allocator); dummy_image.create(1, 4u, dummy_allocator); #if __APPLE__ - if (vkdev->info.vendor_id() != 0x8086) + if (vkdev->info.type() == 0) dummy_image_readonly.create(1, 4u, dummy_allocator); #else dummy_image_readonly.create(1, 4u, dummy_allocator); @@ -1736,7 +1738,7 @@ int VulkanDevicePrivate::create_dummy_buffer_image() cmd.record_dummy(dummy_buffer); cmd.record_dummy(dummy_image); #if __APPLE__ - if (vkdev->info.vendor_id() != 0x8086) + if (vkdev->info.type() == 0) cmd.record_dummy_readonly(dummy_image_readonly); #else cmd.record_dummy_readonly(dummy_image_readonly); @@ -1752,7 +1754,7 @@ void VulkanDevicePrivate::destroy_dummy_buffer_image() dummy_buffer.release(); dummy_image.release(); #if __APPLE__ - if (vkdev->info.vendor_id() != 0x8086) + if (vkdev->info.type() == 0) dummy_image_readonly.release(); #else dummy_image_readonly.release(); @@ -2818,7 +2820,7 @@ VkImageMat VulkanDevice::get_dummy_image() const VkImageMat VulkanDevice::get_dummy_image_readonly() const { #if __APPLE__ - if (info.vendor_id() == 0x8086) + if (info.type() != 0) return d->dummy_image; #endif return d->dummy_image_readonly; @@ -3245,6 +3247,28 @@ static TBuiltInResource get_default_TBuiltInResource() return resource; } +class VulkanShaderIncluder : public glslang::TShader::Includer +{ +public: + virtual glslang::TShader::Includer::IncludeResult* includeLocal(const char* headerName, const char* /*includerName*/, size_t /*inclusionDepth*/) + { + if (strcmp(headerName, "vulkan_activation.comp") == 0) + { + const char* const headerData = vulkan_activation_comp_data; + const size_t headerLength = sizeof(vulkan_activation_comp_data); + glslang::TShader::Includer::IncludeResult* r = new glslang::TShader::Includer::IncludeResult(headerName, headerData, headerLength, 0); + return r; + } + + return 0; + } + + virtual void releaseInclude(glslang::TShader::Includer::IncludeResult* r) + { + delete r; + } +}; + int compile_spirv_module(const char* comp_string, const Option& opt, std::vector& spirv) { // -1 for omitting the tail '\0' @@ -3699,6 +3723,10 @@ int compile_spirv_module(const char* comp_data, int comp_data_size, const Option custom_defines.push_back(std::make_pair("NCNN_shader_local_memory", "1")); } +#if __APPLE__ + custom_defines.push_back(std::make_pair("NCNN_moltenvk", "1")); +#endif + std::string preamble; std::vector processes; @@ -3740,7 +3768,9 @@ int compile_spirv_module(const char* comp_data, int comp_data_size, const Option TBuiltInResource resources = get_default_TBuiltInResource(); - bool pr = s.parse(&resources, 100, false, EShMsgDefault); + VulkanShaderIncluder includer; + + bool pr = s.parse(&resources, 100, ENoProfile, false, false, EShMsgDefault, includer); if (!pr) { NCNN_LOGE("compile spir-v module failed"); diff --git a/src/layer/vulkan/shader/convolution.comp b/src/layer/vulkan/shader/convolution.comp index cb7f29210..6bd9f492a 100644 --- a/src/layer/vulkan/shader/convolution.comp +++ b/src/layer/vulkan/shader/convolution.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -195,77 +198,14 @@ void main() } #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - sum4 = max(sum4, afp(0.f)); - sum5 = max(sum5, afp(0.f)); - sum6 = max(sum6, afp(0.f)); - sum7 = max(sum7, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = sum0 < afp(0.f) ? sum0 * slope : sum0; - sum1 = sum1 < afp(0.f) ? sum1 * slope : sum1; - sum2 = sum2 < afp(0.f) ? sum2 * slope : sum2; - sum3 = sum3 < afp(0.f) ? sum3 * slope : sum3; - sum4 = sum4 < afp(0.f) ? sum4 * slope : sum4; - sum5 = sum5 < afp(0.f) ? sum5 * slope : sum5; - sum6 = sum6 < afp(0.f) ? sum6 * slope : sum6; - sum7 = sum7 < afp(0.f) ? sum7 * slope : sum7; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - sum4 = clamp(sum4, const_min, const_max); - sum5 = clamp(sum5, const_min, const_max); - sum6 = clamp(sum6, const_min, const_max); - sum7 = clamp(sum7, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - sum4 = afp(1.f) / (afp(1.f) + exp(-sum4)); - sum5 = afp(1.f) / (afp(1.f) + exp(-sum5)); - sum6 = afp(1.f) / (afp(1.f) + exp(-sum6)); - sum7 = afp(1.f) / (afp(1.f) + exp(-sum7)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - sum4 = sum4 * tanh(log(exp(sum4) + afp(1.f))); - sum5 = sum5 * tanh(log(exp(sum5) + afp(1.f))); - sum6 = sum6 * tanh(log(exp(sum6) + afp(1.f))); - sum7 = sum7 * tanh(log(exp(sum7) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum4 = sum4 * clamp(sum4 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum5 = sum5 * clamp(sum5 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum6 = sum6 * clamp(sum6 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum7 = sum7 * clamp(sum7 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1); + sum4 = activation_afp(sum4, activation_type, activation_param_0, activation_param_1); + sum5 = activation_afp(sum5, activation_type, activation_param_0, activation_param_1); + sum6 = activation_afp(sum6, activation_type, activation_param_0, activation_param_1); + sum7 = activation_afp(sum7, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); diff --git a/src/layer/vulkan/shader/convolution_1x1s1d1.comp b/src/layer/vulkan/shader/convolution_1x1s1d1.comp index 8b287de98..b05efa377 100644 --- a/src/layer/vulkan/shader/convolution_1x1s1d1.comp +++ b/src/layer/vulkan/shader/convolution_1x1s1d1.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -130,36 +133,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } - + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(sx4.r, sy4.r, gy), sum.r); diff --git a/src/layer/vulkan/shader/convolution_3x3s1d1_winograd23_transform_output.comp b/src/layer/vulkan/shader/convolution_3x3s1d1_winograd23_transform_output.comp index baf7eac73..17923dc51 100644 --- a/src/layer/vulkan/shader/convolution_3x3s1d1_winograd23_transform_output.comp +++ b/src/layer/vulkan/shader/convolution_3x3s1d1_winograd23_transform_output.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -149,53 +152,10 @@ void main() v11 = m11 - m12 + m13; } - if (activation_type == 1) - { - v00 = max(v00, afp(0.f)); - v10 = max(v10, afp(0.f)); - v01 = max(v01, afp(0.f)); - v11 = max(v11, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - v00 = v00 < afp(0.f) ? v00 * slope : v00; - v10 = v10 < afp(0.f) ? v10 * slope : v10; - v01 = v01 < afp(0.f) ? v01 * slope : v01; - v11 = v11 < afp(0.f) ? v11 * slope : v11; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - v00 = clamp(v00, const_min, const_max); - v10 = clamp(v10, const_min, const_max); - v01 = clamp(v01, const_min, const_max); - v11 = clamp(v11, const_min, const_max); - } - if (activation_type == 4) - { - v00 = afp(1.f) / (afp(1.f) + exp(-v00)); - v10 = afp(1.f) / (afp(1.f) + exp(-v10)); - v01 = afp(1.f) / (afp(1.f) + exp(-v01)); - v11 = afp(1.f) / (afp(1.f) + exp(-v11)); - } - if (activation_type == 5) - { - v00 = v00 * tanh(log(exp(v00) + afp(1.f))); - v01 = v01 * tanh(log(exp(v01) + afp(1.f))); - v10 = v10 * tanh(log(exp(v10) + afp(1.f))); - v11 = v11 * tanh(log(exp(v11) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - v00 = v00 * clamp(v00 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v01 = v01 * clamp(v01 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v10 = v10 * clamp(v10 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v11 = v11 * clamp(v11 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + v00 = activation_afp(v00, activation_type, activation_param_0, activation_param_1); + v10 = activation_afp(v10, activation_type, activation_param_0, activation_param_1); + v01 = activation_afp(v01, activation_type, activation_param_0, activation_param_1); + v11 = activation_afp(v11, activation_type, activation_param_0, activation_param_1); // store 2x2 int x = gx * 2; diff --git a/src/layer/vulkan/shader/convolution_3x3s1d1_winograd43_transform_output.comp b/src/layer/vulkan/shader/convolution_3x3s1d1_winograd43_transform_output.comp index 6797a0044..4da8e8f51 100644 --- a/src/layer/vulkan/shader/convolution_3x3s1d1_winograd43_transform_output.comp +++ b/src/layer/vulkan/shader/convolution_3x3s1d1_winograd43_transform_output.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -237,125 +240,22 @@ void main() v33 = bias_value + v33; } - if (activation_type == 1) - { - v00 = max(v00, afp(0.f)); - v01 = max(v01, afp(0.f)); - v02 = max(v02, afp(0.f)); - v03 = max(v03, afp(0.f)); - v10 = max(v10, afp(0.f)); - v11 = max(v11, afp(0.f)); - v12 = max(v12, afp(0.f)); - v13 = max(v13, afp(0.f)); - v20 = max(v20, afp(0.f)); - v21 = max(v21, afp(0.f)); - v22 = max(v22, afp(0.f)); - v23 = max(v23, afp(0.f)); - v30 = max(v30, afp(0.f)); - v31 = max(v31, afp(0.f)); - v32 = max(v32, afp(0.f)); - v33 = max(v33, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - v00 = v00 < afp(0.f) ? v00 * slope : v00; - v01 = v01 < afp(0.f) ? v01 * slope : v01; - v02 = v02 < afp(0.f) ? v02 * slope : v02; - v03 = v03 < afp(0.f) ? v03 * slope : v03; - v10 = v10 < afp(0.f) ? v10 * slope : v10; - v11 = v11 < afp(0.f) ? v11 * slope : v11; - v12 = v12 < afp(0.f) ? v12 * slope : v12; - v13 = v13 < afp(0.f) ? v13 * slope : v13; - v20 = v20 < afp(0.f) ? v20 * slope : v20; - v21 = v21 < afp(0.f) ? v21 * slope : v21; - v22 = v22 < afp(0.f) ? v22 * slope : v22; - v23 = v23 < afp(0.f) ? v23 * slope : v23; - v30 = v30 < afp(0.f) ? v30 * slope : v30; - v31 = v31 < afp(0.f) ? v31 * slope : v31; - v32 = v32 < afp(0.f) ? v32 * slope : v32; - v33 = v33 < afp(0.f) ? v33 * slope : v33; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - v00 = clamp(v00, const_min, const_max); - v01 = clamp(v01, const_min, const_max); - v02 = clamp(v02, const_min, const_max); - v03 = clamp(v03, const_min, const_max); - v10 = clamp(v10, const_min, const_max); - v11 = clamp(v11, const_min, const_max); - v12 = clamp(v12, const_min, const_max); - v13 = clamp(v13, const_min, const_max); - v20 = clamp(v20, const_min, const_max); - v21 = clamp(v21, const_min, const_max); - v22 = clamp(v22, const_min, const_max); - v23 = clamp(v23, const_min, const_max); - v30 = clamp(v30, const_min, const_max); - v31 = clamp(v31, const_min, const_max); - v32 = clamp(v32, const_min, const_max); - v33 = clamp(v33, const_min, const_max); - } - if (activation_type == 4) - { - v00 = afp(1.f) / (afp(1.f) + exp(-v00)); - v01 = afp(1.f) / (afp(1.f) + exp(-v01)); - v02 = afp(1.f) / (afp(1.f) + exp(-v02)); - v03 = afp(1.f) / (afp(1.f) + exp(-v03)); - v10 = afp(1.f) / (afp(1.f) + exp(-v10)); - v11 = afp(1.f) / (afp(1.f) + exp(-v11)); - v12 = afp(1.f) / (afp(1.f) + exp(-v12)); - v13 = afp(1.f) / (afp(1.f) + exp(-v13)); - v20 = afp(1.f) / (afp(1.f) + exp(-v20)); - v21 = afp(1.f) / (afp(1.f) + exp(-v21)); - v22 = afp(1.f) / (afp(1.f) + exp(-v22)); - v23 = afp(1.f) / (afp(1.f) + exp(-v23)); - v30 = afp(1.f) / (afp(1.f) + exp(-v30)); - v31 = afp(1.f) / (afp(1.f) + exp(-v31)); - v32 = afp(1.f) / (afp(1.f) + exp(-v32)); - v33 = afp(1.f) / (afp(1.f) + exp(-v33)); - } - if (activation_type == 5) - { - v00 = v00 * tanh(log(exp(v00) + afp(1.f))); - v01 = v01 * tanh(log(exp(v01) + afp(1.f))); - v02 = v02 * tanh(log(exp(v02) + afp(1.f))); - v03 = v03 * tanh(log(exp(v03) + afp(1.f))); - v10 = v10 * tanh(log(exp(v10) + afp(1.f))); - v11 = v11 * tanh(log(exp(v11) + afp(1.f))); - v12 = v12 * tanh(log(exp(v12) + afp(1.f))); - v13 = v13 * tanh(log(exp(v13) + afp(1.f))); - v20 = v20 * tanh(log(exp(v20) + afp(1.f))); - v21 = v21 * tanh(log(exp(v21) + afp(1.f))); - v22 = v22 * tanh(log(exp(v22) + afp(1.f))); - v23 = v23 * tanh(log(exp(v23) + afp(1.f))); - v30 = v30 * tanh(log(exp(v30) + afp(1.f))); - v31 = v31 * tanh(log(exp(v31) + afp(1.f))); - v32 = v32 * tanh(log(exp(v32) + afp(1.f))); - v33 = v33 * tanh(log(exp(v33) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - v00 = v00 * clamp(v00 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v01 = v01 * clamp(v01 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v02 = v02 * clamp(v02 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v03 = v03 * clamp(v03 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v10 = v10 * clamp(v10 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v11 = v11 * clamp(v11 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v12 = v12 * clamp(v12 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v13 = v13 * clamp(v13 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v20 = v20 * clamp(v20 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v21 = v21 * clamp(v21 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v22 = v22 * clamp(v22 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v23 = v23 * clamp(v23 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v30 = v30 * clamp(v30 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v31 = v31 * clamp(v31 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v32 = v32 * clamp(v32 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v33 = v33 * clamp(v33 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + v00 = activation_afp(v00, activation_type, activation_param_0, activation_param_1); + v01 = activation_afp(v01, activation_type, activation_param_0, activation_param_1); + v02 = activation_afp(v02, activation_type, activation_param_0, activation_param_1); + v03 = activation_afp(v03, activation_type, activation_param_0, activation_param_1); + v10 = activation_afp(v10, activation_type, activation_param_0, activation_param_1); + v11 = activation_afp(v11, activation_type, activation_param_0, activation_param_1); + v12 = activation_afp(v12, activation_type, activation_param_0, activation_param_1); + v13 = activation_afp(v13, activation_type, activation_param_0, activation_param_1); + v20 = activation_afp(v20, activation_type, activation_param_0, activation_param_1); + v21 = activation_afp(v21, activation_type, activation_param_0, activation_param_1); + v22 = activation_afp(v22, activation_type, activation_param_0, activation_param_1); + v23 = activation_afp(v23, activation_type, activation_param_0, activation_param_1); + v30 = activation_afp(v30, activation_type, activation_param_0, activation_param_1); + v31 = activation_afp(v31, activation_type, activation_param_0, activation_param_1); + v32 = activation_afp(v32, activation_type, activation_param_0, activation_param_1); + v33 = activation_afp(v33, activation_type, activation_param_0, activation_param_1); // store 4x4 int x = gx * 4; diff --git a/src/layer/vulkan/shader/convolution_gemm.comp b/src/layer/vulkan/shader/convolution_gemm.comp index 7db334f77..22400e3c8 100644 --- a/src/layer/vulkan/shader/convolution_gemm.comp +++ b/src/layer/vulkan/shader/convolution_gemm.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + #define LOCAL_MEMORY_UNROLL_INCH 8 layout (constant_id = 0) const int kernel_w = 1; @@ -280,53 +283,10 @@ void main() return; #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = sum0 < afp(0.f) ? sum0 * slope : sum0; - sum1 = sum1 < afp(0.f) ? sum1 * slope : sum1; - sum2 = sum2 < afp(0.f) ? sum2 * slope : sum2; - sum3 = sum3 < afp(0.f) ? sum3 * slope : sum3; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack1to4.comp b/src/layer/vulkan/shader/convolution_pack1to4.comp index 6db48b619..11382d198 100644 --- a/src/layer/vulkan/shader/convolution_pack1to4.comp +++ b/src/layer/vulkan/shader/convolution_pack1to4.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -195,77 +198,14 @@ void main() } #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - sum4 = max(sum4, afp(0.f)); - sum5 = max(sum5, afp(0.f)); - sum6 = max(sum6, afp(0.f)); - sum7 = max(sum7, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); - sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); - sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); - sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); - sum4 = mix(sum4, sum4 * afp(slope), lessThan(sum4, afpvec4(0.f))); - sum5 = mix(sum5, sum5 * afp(slope), lessThan(sum5, afpvec4(0.f))); - sum6 = mix(sum6, sum6 * afp(slope), lessThan(sum6, afpvec4(0.f))); - sum7 = mix(sum7, sum7 * afp(slope), lessThan(sum7, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - sum4 = clamp(sum4, const_min, const_max); - sum5 = clamp(sum5, const_min, const_max); - sum6 = clamp(sum6, const_min, const_max); - sum7 = clamp(sum7, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - sum4 = afp(1.f) / (afp(1.f) + exp(-sum4)); - sum5 = afp(1.f) / (afp(1.f) + exp(-sum5)); - sum6 = afp(1.f) / (afp(1.f) + exp(-sum6)); - sum7 = afp(1.f) / (afp(1.f) + exp(-sum7)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - sum4 = sum4 * tanh(log(exp(sum4) + afp(1.f))); - sum5 = sum5 * tanh(log(exp(sum5) + afp(1.f))); - sum6 = sum6 * tanh(log(exp(sum6) + afp(1.f))); - sum7 = sum7 * tanh(log(exp(sum7) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum4 = sum4 * clamp(sum4 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum5 = sum5 * clamp(sum5 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum6 = sum6 * clamp(sum6 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum7 = sum7 * clamp(sum7 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); + sum4 = activation_afpvec4(sum4, activation_type, activation_param_0, activation_param_1); + sum5 = activation_afpvec4(sum5, activation_type, activation_param_0, activation_param_1); + sum6 = activation_afpvec4(sum6, activation_type, activation_param_0, activation_param_1); + sum7 = activation_afpvec4(sum7, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack1to4_1x1s1d1.comp b/src/layer/vulkan/shader/convolution_pack1to4_1x1s1d1.comp index f9028f7e2..b07906c49 100644 --- a/src/layer/vulkan/shader/convolution_pack1to4_1x1s1d1.comp +++ b/src/layer/vulkan/shader/convolution_pack1to4_1x1s1d1.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + #define LOCAL_MEMORY_UNROLL_INCH 8 layout (constant_id = 0) const int bias_term = 0; @@ -237,53 +240,10 @@ void main() #endif #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); - sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); - sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); - sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack1to4_gemm.comp b/src/layer/vulkan/shader/convolution_pack1to4_gemm.comp index e0cfacd42..f8e7f8761 100644 --- a/src/layer/vulkan/shader/convolution_pack1to4_gemm.comp +++ b/src/layer/vulkan/shader/convolution_pack1to4_gemm.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + #define LOCAL_MEMORY_UNROLL_INCH 8 layout (constant_id = 0) const int kernel_w = 1; @@ -280,53 +283,10 @@ void main() return; #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); - sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); - sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); - sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack1to8.comp b/src/layer/vulkan/shader/convolution_pack1to8.comp index d67838a33..07408da56 100644 --- a/src/layer/vulkan/shader/convolution_pack1to8.comp +++ b/src/layer/vulkan/shader/convolution_pack1to8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -214,125 +217,14 @@ void main() } #endif - if (activation_type == 1) - { - sum0[0] = max(sum0[0], afp(0.f)); - sum0[1] = max(sum0[1], afp(0.f)); - sum1[0] = max(sum1[0], afp(0.f)); - sum1[1] = max(sum1[1], afp(0.f)); - sum2[0] = max(sum2[0], afp(0.f)); - sum2[1] = max(sum2[1], afp(0.f)); - sum3[0] = max(sum3[0], afp(0.f)); - sum3[1] = max(sum3[1], afp(0.f)); - sum4[0] = max(sum4[0], afp(0.f)); - sum4[1] = max(sum4[1], afp(0.f)); - sum5[0] = max(sum5[0], afp(0.f)); - sum5[1] = max(sum5[1], afp(0.f)); - sum6[0] = max(sum6[0], afp(0.f)); - sum6[1] = max(sum6[1], afp(0.f)); - sum7[0] = max(sum7[0], afp(0.f)); - sum7[1] = max(sum7[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); - sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); - sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); - sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); - sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); - sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); - sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); - sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); - sum4[0] = mix(sum4[0], sum4[0] * afp(slope), lessThan(sum4[0], afpvec4(0.f))); - sum4[1] = mix(sum4[1], sum4[1] * afp(slope), lessThan(sum4[1], afpvec4(0.f))); - sum5[0] = mix(sum5[0], sum5[0] * afp(slope), lessThan(sum5[0], afpvec4(0.f))); - sum5[1] = mix(sum5[1], sum5[1] * afp(slope), lessThan(sum5[1], afpvec4(0.f))); - sum6[0] = mix(sum6[0], sum6[0] * afp(slope), lessThan(sum6[0], afpvec4(0.f))); - sum6[1] = mix(sum6[1], sum6[1] * afp(slope), lessThan(sum6[1], afpvec4(0.f))); - sum7[0] = mix(sum7[0], sum7[0] * afp(slope), lessThan(sum7[0], afpvec4(0.f))); - sum7[1] = mix(sum7[1], sum7[1] * afp(slope), lessThan(sum7[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0[0] = clamp(sum0[0], const_min, const_max); - sum0[1] = clamp(sum0[1], const_min, const_max); - sum1[0] = clamp(sum1[0], const_min, const_max); - sum1[1] = clamp(sum1[1], const_min, const_max); - sum2[0] = clamp(sum2[0], const_min, const_max); - sum2[1] = clamp(sum2[1], const_min, const_max); - sum3[0] = clamp(sum3[0], const_min, const_max); - sum3[1] = clamp(sum3[1], const_min, const_max); - sum4[0] = clamp(sum4[0], const_min, const_max); - sum4[1] = clamp(sum4[1], const_min, const_max); - sum5[0] = clamp(sum5[0], const_min, const_max); - sum5[1] = clamp(sum5[1], const_min, const_max); - sum6[0] = clamp(sum6[0], const_min, const_max); - sum6[1] = clamp(sum6[1], const_min, const_max); - sum7[0] = clamp(sum7[0], const_min, const_max); - sum7[1] = clamp(sum7[1], const_min, const_max); - } - if (activation_type == 4) - { - sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); - sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); - sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); - sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); - sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); - sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); - sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); - sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); - sum4[0] = afp(1.f) / (afp(1.f) + exp(-sum4[0])); - sum4[1] = afp(1.f) / (afp(1.f) + exp(-sum4[1])); - sum5[0] = afp(1.f) / (afp(1.f) + exp(-sum5[0])); - sum5[1] = afp(1.f) / (afp(1.f) + exp(-sum5[1])); - sum6[0] = afp(1.f) / (afp(1.f) + exp(-sum6[0])); - sum6[1] = afp(1.f) / (afp(1.f) + exp(-sum6[1])); - sum7[0] = afp(1.f) / (afp(1.f) + exp(-sum7[0])); - sum7[1] = afp(1.f) / (afp(1.f) + exp(-sum7[1])); - } - if (activation_type == 5) - { - sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); - sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); - sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); - sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); - sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); - sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); - sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); - sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); - sum4[0] = sum4[0] * tanh(log(exp(sum4[0]) + afp(1.f))); - sum4[1] = sum4[1] * tanh(log(exp(sum4[1]) + afp(1.f))); - sum5[0] = sum5[0] * tanh(log(exp(sum5[0]) + afp(1.f))); - sum5[1] = sum5[1] * tanh(log(exp(sum5[1]) + afp(1.f))); - sum6[0] = sum6[0] * tanh(log(exp(sum6[0]) + afp(1.f))); - sum6[1] = sum6[1] * tanh(log(exp(sum6[1]) + afp(1.f))); - sum7[0] = sum7[0] * tanh(log(exp(sum7[0]) + afp(1.f))); - sum7[1] = sum7[1] * tanh(log(exp(sum7[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum4[0] = sum4[0] * clamp(sum4[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum4[1] = sum4[1] * clamp(sum4[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum5[0] = sum5[0] * clamp(sum5[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum5[1] = sum5[1] * clamp(sum5[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum6[0] = sum6[0] * clamp(sum6[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum6[1] = sum6[1] * clamp(sum6[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum7[0] = sum7[0] * clamp(sum7[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum7[1] = sum7[1] * clamp(sum7[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); + sum4 = activation_afpvec8(sum4, activation_type, activation_param_0, activation_param_1); + sum5 = activation_afpvec8(sum5, activation_type, activation_param_0, activation_param_1); + sum6 = activation_afpvec8(sum6, activation_type, activation_param_0, activation_param_1); + sum7 = activation_afpvec8(sum7, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack1to8_1x1s1d1.comp b/src/layer/vulkan/shader/convolution_pack1to8_1x1s1d1.comp index 847d37091..e237b22c8 100644 --- a/src/layer/vulkan/shader/convolution_pack1to8_1x1s1d1.comp +++ b/src/layer/vulkan/shader/convolution_pack1to8_1x1s1d1.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -160,77 +163,10 @@ void main() } #endif - if (activation_type == 1) - { - sum0[0] = max(sum0[0], afp(0.f)); - sum0[1] = max(sum0[1], afp(0.f)); - sum1[0] = max(sum1[0], afp(0.f)); - sum1[1] = max(sum1[1], afp(0.f)); - sum2[0] = max(sum2[0], afp(0.f)); - sum2[1] = max(sum2[1], afp(0.f)); - sum3[0] = max(sum3[0], afp(0.f)); - sum3[1] = max(sum3[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); - sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); - sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); - sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); - sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); - sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); - sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); - sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0[0] = clamp(sum0[0], const_min, const_max); - sum0[1] = clamp(sum0[1], const_min, const_max); - sum1[0] = clamp(sum1[0], const_min, const_max); - sum1[1] = clamp(sum1[1], const_min, const_max); - sum2[0] = clamp(sum2[0], const_min, const_max); - sum2[1] = clamp(sum2[1], const_min, const_max); - sum3[0] = clamp(sum3[0], const_min, const_max); - sum3[1] = clamp(sum3[1], const_min, const_max); - } - if (activation_type == 4) - { - sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); - sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); - sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); - sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); - sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); - sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); - sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); - sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); - } - if (activation_type == 5) - { - sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); - sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); - sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); - sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); - sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); - sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); - sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); - sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack1to8_gemm.comp b/src/layer/vulkan/shader/convolution_pack1to8_gemm.comp index cd95ab09c..4a725a671 100644 --- a/src/layer/vulkan/shader/convolution_pack1to8_gemm.comp +++ b/src/layer/vulkan/shader/convolution_pack1to8_gemm.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -183,77 +186,10 @@ void main() } #endif - if (activation_type == 1) - { - sum0[0] = max(sum0[0], afp(0.f)); - sum0[1] = max(sum0[1], afp(0.f)); - sum1[0] = max(sum1[0], afp(0.f)); - sum1[1] = max(sum1[1], afp(0.f)); - sum2[0] = max(sum2[0], afp(0.f)); - sum2[1] = max(sum2[1], afp(0.f)); - sum3[0] = max(sum3[0], afp(0.f)); - sum3[1] = max(sum3[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); - sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); - sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); - sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); - sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); - sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); - sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); - sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0[0] = clamp(sum0[0], const_min, const_max); - sum0[1] = clamp(sum0[1], const_min, const_max); - sum1[0] = clamp(sum1[0], const_min, const_max); - sum1[1] = clamp(sum1[1], const_min, const_max); - sum2[0] = clamp(sum2[0], const_min, const_max); - sum2[1] = clamp(sum2[1], const_min, const_max); - sum3[0] = clamp(sum3[0], const_min, const_max); - sum3[1] = clamp(sum3[1], const_min, const_max); - } - if (activation_type == 4) - { - sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); - sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); - sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); - sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); - sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); - sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); - sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); - sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); - } - if (activation_type == 5) - { - sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); - sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); - sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); - sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); - sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); - sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); - sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); - sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack4.comp b/src/layer/vulkan/shader/convolution_pack4.comp index de7c1257f..7fd8e6b62 100644 --- a/src/layer/vulkan/shader/convolution_pack4.comp +++ b/src/layer/vulkan/shader/convolution_pack4.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -226,77 +229,14 @@ void main() } #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - sum4 = max(sum4, afp(0.f)); - sum5 = max(sum5, afp(0.f)); - sum6 = max(sum6, afp(0.f)); - sum7 = max(sum7, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); - sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); - sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); - sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); - sum4 = mix(sum4, sum4 * afp(slope), lessThan(sum4, afpvec4(0.f))); - sum5 = mix(sum5, sum5 * afp(slope), lessThan(sum5, afpvec4(0.f))); - sum6 = mix(sum6, sum6 * afp(slope), lessThan(sum6, afpvec4(0.f))); - sum7 = mix(sum7, sum7 * afp(slope), lessThan(sum7, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - sum4 = clamp(sum4, const_min, const_max); - sum5 = clamp(sum5, const_min, const_max); - sum6 = clamp(sum6, const_min, const_max); - sum7 = clamp(sum7, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - sum4 = afp(1.f) / (afp(1.f) + exp(-sum4)); - sum5 = afp(1.f) / (afp(1.f) + exp(-sum5)); - sum6 = afp(1.f) / (afp(1.f) + exp(-sum6)); - sum7 = afp(1.f) / (afp(1.f) + exp(-sum7)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - sum4 = sum4 * tanh(log(exp(sum4) + afp(1.f))); - sum5 = sum5 * tanh(log(exp(sum5) + afp(1.f))); - sum6 = sum6 * tanh(log(exp(sum6) + afp(1.f))); - sum7 = sum7 * tanh(log(exp(sum7) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum4 = sum4 * clamp(sum4 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum5 = sum5 * clamp(sum5 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum6 = sum6 * clamp(sum6 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum7 = sum7 * clamp(sum7 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); + sum4 = activation_afpvec4(sum4, activation_type, activation_param_0, activation_param_1); + sum5 = activation_afpvec4(sum5, activation_type, activation_param_0, activation_param_1); + sum6 = activation_afpvec4(sum6, activation_type, activation_param_0, activation_param_1); + sum7 = activation_afpvec4(sum7, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack4_1x1s1d1.comp b/src/layer/vulkan/shader/convolution_pack4_1x1s1d1.comp index e377191f4..cf62cdbe3 100644 --- a/src/layer/vulkan/shader/convolution_pack4_1x1s1d1.comp +++ b/src/layer/vulkan/shader/convolution_pack4_1x1s1d1.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + #define LOCAL_MEMORY_UNROLL_INCH 8 layout (constant_id = 0) const int bias_term = 0; @@ -257,53 +260,10 @@ void main() #endif #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); - sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); - sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); - sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack4_1x1s1d1_cm_16_8_8.comp b/src/layer/vulkan/shader/convolution_pack4_1x1s1d1_cm_16_8_8.comp index 22d79616d..3d9c5d442 100644 --- a/src/layer/vulkan/shader/convolution_pack4_1x1s1d1_cm_16_8_8.comp +++ b/src/layer/vulkan/shader/convolution_pack4_1x1s1d1_cm_16_8_8.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + #extension GL_KHR_memory_scope_semantics: require #extension GL_EXT_shader_explicit_arithmetic_types: require #extension GL_EXT_shader_explicit_arithmetic_types_float16: require @@ -238,53 +241,10 @@ void main() afpvec4 sum2 = afpvec4(unpackHalf2x16(sum2_u2.x), unpackHalf2x16(sum2_u2.y)); afpvec4 sum3 = afpvec4(unpackHalf2x16(sum3_u2.x), unpackHalf2x16(sum3_u2.y)); - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); - sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); - sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); - sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); int gi = gy * psc(outcstep) + gx + lxd16 * psc(outcstep) + lxm16; { diff --git a/src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd23_transform_output.comp b/src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd23_transform_output.comp index b3db27bf8..a93a910af 100644 --- a/src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd23_transform_output.comp +++ b/src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd23_transform_output.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -149,53 +152,10 @@ void main() v11 = m11 - m12 + m13; } - if (activation_type == 1) - { - v00 = max(v00, afp(0.f)); - v10 = max(v10, afp(0.f)); - v01 = max(v01, afp(0.f)); - v11 = max(v11, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - v00 = mix(v00, v00 * afp(slope), lessThan(v00, afpvec4(0.f))); - v10 = mix(v10, v10 * afp(slope), lessThan(v10, afpvec4(0.f))); - v01 = mix(v01, v01 * afp(slope), lessThan(v01, afpvec4(0.f))); - v11 = mix(v11, v11 * afp(slope), lessThan(v11, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - v00 = clamp(v00, const_min, const_max); - v10 = clamp(v10, const_min, const_max); - v01 = clamp(v01, const_min, const_max); - v11 = clamp(v11, const_min, const_max); - } - if (activation_type == 4) - { - v00 = afp(1.f) / (afp(1.f) + exp(-v00)); - v10 = afp(1.f) / (afp(1.f) + exp(-v10)); - v01 = afp(1.f) / (afp(1.f) + exp(-v01)); - v11 = afp(1.f) / (afp(1.f) + exp(-v11)); - } - if (activation_type == 5) - { - v00 = v00 * tanh(log(exp(v00) + afp(1.f))); - v01 = v01 * tanh(log(exp(v01) + afp(1.f))); - v10 = v10 * tanh(log(exp(v10) + afp(1.f))); - v11 = v11 * tanh(log(exp(v11) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - v00 = v00 * clamp(v00 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v01 = v01 * clamp(v01 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v10 = v10 * clamp(v10 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v11 = v11 * clamp(v11 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + v00 = activation_afpvec4(v00, activation_type, activation_param_0, activation_param_1); + v01 = activation_afpvec4(v01, activation_type, activation_param_0, activation_param_1); + v10 = activation_afpvec4(v10, activation_type, activation_param_0, activation_param_1); + v11 = activation_afpvec4(v11, activation_type, activation_param_0, activation_param_1); // store 2x2 int x = gx * 2; diff --git a/src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd43_transform_output.comp b/src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd43_transform_output.comp index 0274b2362..dc1cdf337 100644 --- a/src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd43_transform_output.comp +++ b/src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd43_transform_output.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -237,125 +240,22 @@ void main() v33 = bias_value + v33; } - if (activation_type == 1) - { - v00 = max(v00, afp(0.f)); - v01 = max(v01, afp(0.f)); - v02 = max(v02, afp(0.f)); - v03 = max(v03, afp(0.f)); - v10 = max(v10, afp(0.f)); - v11 = max(v11, afp(0.f)); - v12 = max(v12, afp(0.f)); - v13 = max(v13, afp(0.f)); - v20 = max(v20, afp(0.f)); - v21 = max(v21, afp(0.f)); - v22 = max(v22, afp(0.f)); - v23 = max(v23, afp(0.f)); - v30 = max(v30, afp(0.f)); - v31 = max(v31, afp(0.f)); - v32 = max(v32, afp(0.f)); - v33 = max(v33, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - v00 = mix(v00, v00 * afp(slope), lessThan(v00, afpvec4(0.f))); - v01 = mix(v01, v01 * afp(slope), lessThan(v01, afpvec4(0.f))); - v02 = mix(v02, v02 * afp(slope), lessThan(v02, afpvec4(0.f))); - v03 = mix(v03, v03 * afp(slope), lessThan(v03, afpvec4(0.f))); - v10 = mix(v10, v10 * afp(slope), lessThan(v10, afpvec4(0.f))); - v11 = mix(v11, v11 * afp(slope), lessThan(v11, afpvec4(0.f))); - v12 = mix(v12, v12 * afp(slope), lessThan(v12, afpvec4(0.f))); - v13 = mix(v13, v13 * afp(slope), lessThan(v13, afpvec4(0.f))); - v20 = mix(v20, v20 * afp(slope), lessThan(v20, afpvec4(0.f))); - v21 = mix(v21, v21 * afp(slope), lessThan(v21, afpvec4(0.f))); - v22 = mix(v22, v22 * afp(slope), lessThan(v22, afpvec4(0.f))); - v23 = mix(v23, v23 * afp(slope), lessThan(v23, afpvec4(0.f))); - v30 = mix(v30, v30 * afp(slope), lessThan(v30, afpvec4(0.f))); - v31 = mix(v31, v31 * afp(slope), lessThan(v31, afpvec4(0.f))); - v32 = mix(v32, v32 * afp(slope), lessThan(v32, afpvec4(0.f))); - v33 = mix(v33, v33 * afp(slope), lessThan(v33, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - v00 = clamp(v00, const_min, const_max); - v01 = clamp(v01, const_min, const_max); - v02 = clamp(v02, const_min, const_max); - v03 = clamp(v03, const_min, const_max); - v10 = clamp(v10, const_min, const_max); - v11 = clamp(v11, const_min, const_max); - v12 = clamp(v12, const_min, const_max); - v13 = clamp(v13, const_min, const_max); - v20 = clamp(v20, const_min, const_max); - v21 = clamp(v21, const_min, const_max); - v22 = clamp(v22, const_min, const_max); - v23 = clamp(v23, const_min, const_max); - v30 = clamp(v30, const_min, const_max); - v31 = clamp(v31, const_min, const_max); - v32 = clamp(v32, const_min, const_max); - v33 = clamp(v33, const_min, const_max); - } - if (activation_type == 4) - { - v00 = afp(1.f) / (afp(1.f) + exp(-v00)); - v01 = afp(1.f) / (afp(1.f) + exp(-v01)); - v02 = afp(1.f) / (afp(1.f) + exp(-v02)); - v03 = afp(1.f) / (afp(1.f) + exp(-v03)); - v10 = afp(1.f) / (afp(1.f) + exp(-v10)); - v11 = afp(1.f) / (afp(1.f) + exp(-v11)); - v12 = afp(1.f) / (afp(1.f) + exp(-v12)); - v13 = afp(1.f) / (afp(1.f) + exp(-v13)); - v20 = afp(1.f) / (afp(1.f) + exp(-v20)); - v21 = afp(1.f) / (afp(1.f) + exp(-v21)); - v22 = afp(1.f) / (afp(1.f) + exp(-v22)); - v23 = afp(1.f) / (afp(1.f) + exp(-v23)); - v30 = afp(1.f) / (afp(1.f) + exp(-v30)); - v31 = afp(1.f) / (afp(1.f) + exp(-v31)); - v32 = afp(1.f) / (afp(1.f) + exp(-v32)); - v33 = afp(1.f) / (afp(1.f) + exp(-v33)); - } - if (activation_type == 5) - { - v00 = v00 * tanh(log(exp(v00) + afp(1.f))); - v01 = v01 * tanh(log(exp(v01) + afp(1.f))); - v02 = v02 * tanh(log(exp(v02) + afp(1.f))); - v03 = v03 * tanh(log(exp(v03) + afp(1.f))); - v10 = v10 * tanh(log(exp(v10) + afp(1.f))); - v11 = v11 * tanh(log(exp(v11) + afp(1.f))); - v12 = v12 * tanh(log(exp(v12) + afp(1.f))); - v13 = v13 * tanh(log(exp(v13) + afp(1.f))); - v20 = v20 * tanh(log(exp(v20) + afp(1.f))); - v21 = v21 * tanh(log(exp(v21) + afp(1.f))); - v22 = v22 * tanh(log(exp(v22) + afp(1.f))); - v23 = v23 * tanh(log(exp(v23) + afp(1.f))); - v30 = v30 * tanh(log(exp(v30) + afp(1.f))); - v31 = v31 * tanh(log(exp(v31) + afp(1.f))); - v32 = v32 * tanh(log(exp(v32) + afp(1.f))); - v33 = v33 * tanh(log(exp(v33) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - v00 = v00 * clamp(v00 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v01 = v01 * clamp(v01 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v02 = v02 * clamp(v02 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v03 = v03 * clamp(v03 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v10 = v10 * clamp(v10 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v11 = v11 * clamp(v11 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v12 = v12 * clamp(v12 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v13 = v13 * clamp(v13 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v20 = v20 * clamp(v20 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v21 = v21 * clamp(v21 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v22 = v22 * clamp(v22 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v23 = v23 * clamp(v23 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v30 = v30 * clamp(v30 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v31 = v31 * clamp(v31 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v32 = v32 * clamp(v32 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v33 = v33 * clamp(v33 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + v00 = activation_afpvec4(v00, activation_type, activation_param_0, activation_param_1); + v01 = activation_afpvec4(v01, activation_type, activation_param_0, activation_param_1); + v02 = activation_afpvec4(v02, activation_type, activation_param_0, activation_param_1); + v03 = activation_afpvec4(v03, activation_type, activation_param_0, activation_param_1); + v10 = activation_afpvec4(v10, activation_type, activation_param_0, activation_param_1); + v11 = activation_afpvec4(v11, activation_type, activation_param_0, activation_param_1); + v12 = activation_afpvec4(v12, activation_type, activation_param_0, activation_param_1); + v13 = activation_afpvec4(v13, activation_type, activation_param_0, activation_param_1); + v20 = activation_afpvec4(v20, activation_type, activation_param_0, activation_param_1); + v21 = activation_afpvec4(v21, activation_type, activation_param_0, activation_param_1); + v22 = activation_afpvec4(v22, activation_type, activation_param_0, activation_param_1); + v23 = activation_afpvec4(v23, activation_type, activation_param_0, activation_param_1); + v30 = activation_afpvec4(v30, activation_type, activation_param_0, activation_param_1); + v31 = activation_afpvec4(v31, activation_type, activation_param_0, activation_param_1); + v32 = activation_afpvec4(v32, activation_type, activation_param_0, activation_param_1); + v33 = activation_afpvec4(v33, activation_type, activation_param_0, activation_param_1); // store 4x4 int x = gx * 4; diff --git a/src/layer/vulkan/shader/convolution_pack4_gemm.comp b/src/layer/vulkan/shader/convolution_pack4_gemm.comp index e2c105629..b699c7996 100644 --- a/src/layer/vulkan/shader/convolution_pack4_gemm.comp +++ b/src/layer/vulkan/shader/convolution_pack4_gemm.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + #define LOCAL_MEMORY_UNROLL_INCH 8 layout (constant_id = 0) const int kernel_w = 1; @@ -300,53 +303,10 @@ void main() return; #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); - sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); - sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); - sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack4_gemm_cm_16_8_8.comp b/src/layer/vulkan/shader/convolution_pack4_gemm_cm_16_8_8.comp index 020b79c82..80a0463a9 100644 --- a/src/layer/vulkan/shader/convolution_pack4_gemm_cm_16_8_8.comp +++ b/src/layer/vulkan/shader/convolution_pack4_gemm_cm_16_8_8.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + #extension GL_KHR_memory_scope_semantics: require #extension GL_EXT_shader_explicit_arithmetic_types: require #extension GL_EXT_shader_explicit_arithmetic_types_float16: require @@ -275,53 +278,10 @@ void main() afpvec4 sum2 = afpvec4(unpackHalf2x16(sum2_u2.x), unpackHalf2x16(sum2_u2.y)); afpvec4 sum3 = afpvec4(unpackHalf2x16(sum3_u2.x), unpackHalf2x16(sum3_u2.y)); - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); - sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); - sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); - sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); int gi = gy * psc(outcstep) + gx + lxd16 * psc(outcstep) + lxm16; { diff --git a/src/layer/vulkan/shader/convolution_pack4to1.comp b/src/layer/vulkan/shader/convolution_pack4to1.comp index e492097e0..c800e15ac 100644 --- a/src/layer/vulkan/shader/convolution_pack4to1.comp +++ b/src/layer/vulkan/shader/convolution_pack4to1.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -195,77 +198,14 @@ void main() } #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - sum4 = max(sum4, afp(0.f)); - sum5 = max(sum5, afp(0.f)); - sum6 = max(sum6, afp(0.f)); - sum7 = max(sum7, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = sum0 < afp(0.f) ? sum0 * slope : sum0; - sum1 = sum1 < afp(0.f) ? sum1 * slope : sum1; - sum2 = sum2 < afp(0.f) ? sum2 * slope : sum2; - sum3 = sum3 < afp(0.f) ? sum3 * slope : sum3; - sum4 = sum4 < afp(0.f) ? sum4 * slope : sum4; - sum5 = sum5 < afp(0.f) ? sum5 * slope : sum5; - sum6 = sum6 < afp(0.f) ? sum6 * slope : sum6; - sum7 = sum7 < afp(0.f) ? sum7 * slope : sum7; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - sum4 = clamp(sum4, const_min, const_max); - sum5 = clamp(sum5, const_min, const_max); - sum6 = clamp(sum6, const_min, const_max); - sum7 = clamp(sum7, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - sum4 = afp(1.f) / (afp(1.f) + exp(-sum4)); - sum5 = afp(1.f) / (afp(1.f) + exp(-sum5)); - sum6 = afp(1.f) / (afp(1.f) + exp(-sum6)); - sum7 = afp(1.f) / (afp(1.f) + exp(-sum7)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - sum4 = sum4 * tanh(log(exp(sum4) + afp(1.f))); - sum5 = sum5 * tanh(log(exp(sum5) + afp(1.f))); - sum6 = sum6 * tanh(log(exp(sum6) + afp(1.f))); - sum7 = sum7 * tanh(log(exp(sum7) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum4 = sum4 * clamp(sum4 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum5 = sum5 * clamp(sum5 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum6 = sum6 * clamp(sum6 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum7 = sum7 * clamp(sum7 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1); + sum4 = activation_afp(sum4, activation_type, activation_param_0, activation_param_1); + sum5 = activation_afp(sum5, activation_type, activation_param_0, activation_param_1); + sum6 = activation_afp(sum6, activation_type, activation_param_0, activation_param_1); + sum7 = activation_afp(sum7, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack4to1_1x1s1d1.comp b/src/layer/vulkan/shader/convolution_pack4to1_1x1s1d1.comp index b040af642..555ec83c7 100644 --- a/src/layer/vulkan/shader/convolution_pack4to1_1x1s1d1.comp +++ b/src/layer/vulkan/shader/convolution_pack4to1_1x1s1d1.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + #define LOCAL_MEMORY_UNROLL_INCH 8 layout (constant_id = 0) const int bias_term = 0; @@ -237,53 +240,10 @@ void main() #endif #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = sum0 < afp(0.f) ? sum0 * slope : sum0; - sum1 = sum1 < afp(0.f) ? sum1 * slope : sum1; - sum2 = sum2 < afp(0.f) ? sum2 * slope : sum2; - sum3 = sum3 < afp(0.f) ? sum3 * slope : sum3; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack4to1_gemm.comp b/src/layer/vulkan/shader/convolution_pack4to1_gemm.comp index 6d780222b..f769d1a68 100644 --- a/src/layer/vulkan/shader/convolution_pack4to1_gemm.comp +++ b/src/layer/vulkan/shader/convolution_pack4to1_gemm.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + #define LOCAL_MEMORY_UNROLL_INCH 8 layout (constant_id = 0) const int kernel_w = 1; @@ -280,53 +283,10 @@ void main() return; #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = sum0 < afp(0.f) ? sum0 * slope : sum0; - sum1 = sum1 < afp(0.f) ? sum1 * slope : sum1; - sum2 = sum2 < afp(0.f) ? sum2 * slope : sum2; - sum3 = sum3 < afp(0.f) ? sum3 * slope : sum3; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack4to8.comp b/src/layer/vulkan/shader/convolution_pack4to8.comp index b83e312ac..8b6fb7b82 100644 --- a/src/layer/vulkan/shader/convolution_pack4to8.comp +++ b/src/layer/vulkan/shader/convolution_pack4to8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -342,125 +345,14 @@ void main() } #endif - if (activation_type == 1) - { - sum0[0] = max(sum0[0], afp(0.f)); - sum0[1] = max(sum0[1], afp(0.f)); - sum1[0] = max(sum1[0], afp(0.f)); - sum1[1] = max(sum1[1], afp(0.f)); - sum2[0] = max(sum2[0], afp(0.f)); - sum2[1] = max(sum2[1], afp(0.f)); - sum3[0] = max(sum3[0], afp(0.f)); - sum3[1] = max(sum3[1], afp(0.f)); - sum4[0] = max(sum4[0], afp(0.f)); - sum4[1] = max(sum4[1], afp(0.f)); - sum5[0] = max(sum5[0], afp(0.f)); - sum5[1] = max(sum5[1], afp(0.f)); - sum6[0] = max(sum6[0], afp(0.f)); - sum6[1] = max(sum6[1], afp(0.f)); - sum7[0] = max(sum7[0], afp(0.f)); - sum7[1] = max(sum7[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); - sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); - sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); - sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); - sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); - sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); - sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); - sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); - sum4[0] = mix(sum4[0], sum4[0] * afp(slope), lessThan(sum4[0], afpvec4(0.f))); - sum4[1] = mix(sum4[1], sum4[1] * afp(slope), lessThan(sum4[1], afpvec4(0.f))); - sum5[0] = mix(sum5[0], sum5[0] * afp(slope), lessThan(sum5[0], afpvec4(0.f))); - sum5[1] = mix(sum5[1], sum5[1] * afp(slope), lessThan(sum5[1], afpvec4(0.f))); - sum6[0] = mix(sum6[0], sum6[0] * afp(slope), lessThan(sum6[0], afpvec4(0.f))); - sum6[1] = mix(sum6[1], sum6[1] * afp(slope), lessThan(sum6[1], afpvec4(0.f))); - sum7[0] = mix(sum7[0], sum7[0] * afp(slope), lessThan(sum7[0], afpvec4(0.f))); - sum7[1] = mix(sum7[1], sum7[1] * afp(slope), lessThan(sum7[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0[0] = clamp(sum0[0], const_min, const_max); - sum0[1] = clamp(sum0[1], const_min, const_max); - sum1[0] = clamp(sum1[0], const_min, const_max); - sum1[1] = clamp(sum1[1], const_min, const_max); - sum2[0] = clamp(sum2[0], const_min, const_max); - sum2[1] = clamp(sum2[1], const_min, const_max); - sum3[0] = clamp(sum3[0], const_min, const_max); - sum3[1] = clamp(sum3[1], const_min, const_max); - sum4[0] = clamp(sum4[0], const_min, const_max); - sum4[1] = clamp(sum4[1], const_min, const_max); - sum5[0] = clamp(sum5[0], const_min, const_max); - sum5[1] = clamp(sum5[1], const_min, const_max); - sum6[0] = clamp(sum6[0], const_min, const_max); - sum6[1] = clamp(sum6[1], const_min, const_max); - sum7[0] = clamp(sum7[0], const_min, const_max); - sum7[1] = clamp(sum7[1], const_min, const_max); - } - if (activation_type == 4) - { - sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); - sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); - sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); - sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); - sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); - sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); - sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); - sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); - sum4[0] = afp(1.f) / (afp(1.f) + exp(-sum4[0])); - sum4[1] = afp(1.f) / (afp(1.f) + exp(-sum4[1])); - sum5[0] = afp(1.f) / (afp(1.f) + exp(-sum5[0])); - sum5[1] = afp(1.f) / (afp(1.f) + exp(-sum5[1])); - sum6[0] = afp(1.f) / (afp(1.f) + exp(-sum6[0])); - sum6[1] = afp(1.f) / (afp(1.f) + exp(-sum6[1])); - sum7[0] = afp(1.f) / (afp(1.f) + exp(-sum7[0])); - sum7[1] = afp(1.f) / (afp(1.f) + exp(-sum7[1])); - } - if (activation_type == 5) - { - sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); - sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); - sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); - sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); - sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); - sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); - sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); - sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); - sum4[0] = sum4[0] * tanh(log(exp(sum4[0]) + afp(1.f))); - sum4[1] = sum4[1] * tanh(log(exp(sum4[1]) + afp(1.f))); - sum5[0] = sum5[0] * tanh(log(exp(sum5[0]) + afp(1.f))); - sum5[1] = sum5[1] * tanh(log(exp(sum5[1]) + afp(1.f))); - sum6[0] = sum6[0] * tanh(log(exp(sum6[0]) + afp(1.f))); - sum6[1] = sum6[1] * tanh(log(exp(sum6[1]) + afp(1.f))); - sum7[0] = sum7[0] * tanh(log(exp(sum7[0]) + afp(1.f))); - sum7[1] = sum7[1] * tanh(log(exp(sum7[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum4[0] = sum4[0] * clamp(sum4[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum4[1] = sum4[1] * clamp(sum4[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum5[0] = sum5[0] * clamp(sum5[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum5[1] = sum5[1] * clamp(sum5[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum6[0] = sum6[0] * clamp(sum6[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum6[1] = sum6[1] * clamp(sum6[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum7[0] = sum7[0] * clamp(sum7[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum7[1] = sum7[1] * clamp(sum7[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); + sum4 = activation_afpvec8(sum4, activation_type, activation_param_0, activation_param_1); + sum5 = activation_afpvec8(sum5, activation_type, activation_param_0, activation_param_1); + sum6 = activation_afpvec8(sum6, activation_type, activation_param_0, activation_param_1); + sum7 = activation_afpvec8(sum7, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack4to8_1x1s1d1.comp b/src/layer/vulkan/shader/convolution_pack4to8_1x1s1d1.comp index 8c2375866..fe6d0bf7d 100644 --- a/src/layer/vulkan/shader/convolution_pack4to8_1x1s1d1.comp +++ b/src/layer/vulkan/shader/convolution_pack4to8_1x1s1d1.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -222,77 +225,10 @@ void main() } #endif - if (activation_type == 1) - { - sum0[0] = max(sum0[0], afp(0.f)); - sum0[1] = max(sum0[1], afp(0.f)); - sum1[0] = max(sum1[0], afp(0.f)); - sum1[1] = max(sum1[1], afp(0.f)); - sum2[0] = max(sum2[0], afp(0.f)); - sum2[1] = max(sum2[1], afp(0.f)); - sum3[0] = max(sum3[0], afp(0.f)); - sum3[1] = max(sum3[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); - sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); - sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); - sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); - sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); - sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); - sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); - sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0[0] = clamp(sum0[0], const_min, const_max); - sum0[1] = clamp(sum0[1], const_min, const_max); - sum1[0] = clamp(sum1[0], const_min, const_max); - sum1[1] = clamp(sum1[1], const_min, const_max); - sum2[0] = clamp(sum2[0], const_min, const_max); - sum2[1] = clamp(sum2[1], const_min, const_max); - sum3[0] = clamp(sum3[0], const_min, const_max); - sum3[1] = clamp(sum3[1], const_min, const_max); - } - if (activation_type == 4) - { - sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); - sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); - sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); - sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); - sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); - sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); - sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); - sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); - } - if (activation_type == 5) - { - sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); - sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); - sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); - sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); - sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); - sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); - sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); - sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack4to8_gemm.comp b/src/layer/vulkan/shader/convolution_pack4to8_gemm.comp index f8b87d9c7..b8e8f6d47 100644 --- a/src/layer/vulkan/shader/convolution_pack4to8_gemm.comp +++ b/src/layer/vulkan/shader/convolution_pack4to8_gemm.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -245,77 +248,10 @@ void main() } #endif - if (activation_type == 1) - { - sum0[0] = max(sum0[0], afp(0.f)); - sum0[1] = max(sum0[1], afp(0.f)); - sum1[0] = max(sum1[0], afp(0.f)); - sum1[1] = max(sum1[1], afp(0.f)); - sum2[0] = max(sum2[0], afp(0.f)); - sum2[1] = max(sum2[1], afp(0.f)); - sum3[0] = max(sum3[0], afp(0.f)); - sum3[1] = max(sum3[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); - sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); - sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); - sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); - sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); - sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); - sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); - sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0[0] = clamp(sum0[0], const_min, const_max); - sum0[1] = clamp(sum0[1], const_min, const_max); - sum1[0] = clamp(sum1[0], const_min, const_max); - sum1[1] = clamp(sum1[1], const_min, const_max); - sum2[0] = clamp(sum2[0], const_min, const_max); - sum2[1] = clamp(sum2[1], const_min, const_max); - sum3[0] = clamp(sum3[0], const_min, const_max); - sum3[1] = clamp(sum3[1], const_min, const_max); - } - if (activation_type == 4) - { - sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); - sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); - sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); - sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); - sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); - sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); - sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); - sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); - } - if (activation_type == 5) - { - sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); - sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); - sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); - sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); - sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); - sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); - sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); - sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack8.comp b/src/layer/vulkan/shader/convolution_pack8.comp index 7f35988a6..e3ece2c07 100644 --- a/src/layer/vulkan/shader/convolution_pack8.comp +++ b/src/layer/vulkan/shader/convolution_pack8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -342,125 +345,14 @@ void main() } #endif - if (activation_type == 1) - { - sum0[0] = max(sum0[0], afp(0.f)); - sum0[1] = max(sum0[1], afp(0.f)); - sum1[0] = max(sum1[0], afp(0.f)); - sum1[1] = max(sum1[1], afp(0.f)); - sum2[0] = max(sum2[0], afp(0.f)); - sum2[1] = max(sum2[1], afp(0.f)); - sum3[0] = max(sum3[0], afp(0.f)); - sum3[1] = max(sum3[1], afp(0.f)); - sum4[0] = max(sum4[0], afp(0.f)); - sum4[1] = max(sum4[1], afp(0.f)); - sum5[0] = max(sum5[0], afp(0.f)); - sum5[1] = max(sum5[1], afp(0.f)); - sum6[0] = max(sum6[0], afp(0.f)); - sum6[1] = max(sum6[1], afp(0.f)); - sum7[0] = max(sum7[0], afp(0.f)); - sum7[1] = max(sum7[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); - sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); - sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); - sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); - sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); - sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); - sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); - sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); - sum4[0] = mix(sum4[0], sum4[0] * afp(slope), lessThan(sum4[0], afpvec4(0.f))); - sum4[1] = mix(sum4[1], sum4[1] * afp(slope), lessThan(sum4[1], afpvec4(0.f))); - sum5[0] = mix(sum5[0], sum5[0] * afp(slope), lessThan(sum5[0], afpvec4(0.f))); - sum5[1] = mix(sum5[1], sum5[1] * afp(slope), lessThan(sum5[1], afpvec4(0.f))); - sum6[0] = mix(sum6[0], sum6[0] * afp(slope), lessThan(sum6[0], afpvec4(0.f))); - sum6[1] = mix(sum6[1], sum6[1] * afp(slope), lessThan(sum6[1], afpvec4(0.f))); - sum7[0] = mix(sum7[0], sum7[0] * afp(slope), lessThan(sum7[0], afpvec4(0.f))); - sum7[1] = mix(sum7[1], sum7[1] * afp(slope), lessThan(sum7[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0[0] = clamp(sum0[0], const_min, const_max); - sum0[1] = clamp(sum0[1], const_min, const_max); - sum1[0] = clamp(sum1[0], const_min, const_max); - sum1[1] = clamp(sum1[1], const_min, const_max); - sum2[0] = clamp(sum2[0], const_min, const_max); - sum2[1] = clamp(sum2[1], const_min, const_max); - sum3[0] = clamp(sum3[0], const_min, const_max); - sum3[1] = clamp(sum3[1], const_min, const_max); - sum4[0] = clamp(sum4[0], const_min, const_max); - sum4[1] = clamp(sum4[1], const_min, const_max); - sum5[0] = clamp(sum5[0], const_min, const_max); - sum5[1] = clamp(sum5[1], const_min, const_max); - sum6[0] = clamp(sum6[0], const_min, const_max); - sum6[1] = clamp(sum6[1], const_min, const_max); - sum7[0] = clamp(sum7[0], const_min, const_max); - sum7[1] = clamp(sum7[1], const_min, const_max); - } - if (activation_type == 4) - { - sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); - sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); - sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); - sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); - sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); - sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); - sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); - sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); - sum4[0] = afp(1.f) / (afp(1.f) + exp(-sum4[0])); - sum4[1] = afp(1.f) / (afp(1.f) + exp(-sum4[1])); - sum5[0] = afp(1.f) / (afp(1.f) + exp(-sum5[0])); - sum5[1] = afp(1.f) / (afp(1.f) + exp(-sum5[1])); - sum6[0] = afp(1.f) / (afp(1.f) + exp(-sum6[0])); - sum6[1] = afp(1.f) / (afp(1.f) + exp(-sum6[1])); - sum7[0] = afp(1.f) / (afp(1.f) + exp(-sum7[0])); - sum7[1] = afp(1.f) / (afp(1.f) + exp(-sum7[1])); - } - if (activation_type == 5) - { - sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); - sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); - sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); - sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); - sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); - sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); - sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); - sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); - sum4[0] = sum4[0] * tanh(log(exp(sum4[0]) + afp(1.f))); - sum4[1] = sum4[1] * tanh(log(exp(sum4[1]) + afp(1.f))); - sum5[0] = sum5[0] * tanh(log(exp(sum5[0]) + afp(1.f))); - sum5[1] = sum5[1] * tanh(log(exp(sum5[1]) + afp(1.f))); - sum6[0] = sum6[0] * tanh(log(exp(sum6[0]) + afp(1.f))); - sum6[1] = sum6[1] * tanh(log(exp(sum6[1]) + afp(1.f))); - sum7[0] = sum7[0] * tanh(log(exp(sum7[0]) + afp(1.f))); - sum7[1] = sum7[1] * tanh(log(exp(sum7[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum4[0] = sum4[0] * clamp(sum4[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum4[1] = sum4[1] * clamp(sum4[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum5[0] = sum5[0] * clamp(sum5[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum5[1] = sum5[1] * clamp(sum5[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum6[0] = sum6[0] * clamp(sum6[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum6[1] = sum6[1] * clamp(sum6[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum7[0] = sum7[0] * clamp(sum7[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum7[1] = sum7[1] * clamp(sum7[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); + sum4 = activation_afpvec8(sum4, activation_type, activation_param_0, activation_param_1); + sum5 = activation_afpvec8(sum5, activation_type, activation_param_0, activation_param_1); + sum6 = activation_afpvec8(sum6, activation_type, activation_param_0, activation_param_1); + sum7 = activation_afpvec8(sum7, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack8_1x1s1d1.comp b/src/layer/vulkan/shader/convolution_pack8_1x1s1d1.comp index e74bc8424..0ca36f025 100644 --- a/src/layer/vulkan/shader/convolution_pack8_1x1s1d1.comp +++ b/src/layer/vulkan/shader/convolution_pack8_1x1s1d1.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -222,77 +225,10 @@ void main() } #endif - if (activation_type == 1) - { - sum0[0] = max(sum0[0], afp(0.f)); - sum0[1] = max(sum0[1], afp(0.f)); - sum1[0] = max(sum1[0], afp(0.f)); - sum1[1] = max(sum1[1], afp(0.f)); - sum2[0] = max(sum2[0], afp(0.f)); - sum2[1] = max(sum2[1], afp(0.f)); - sum3[0] = max(sum3[0], afp(0.f)); - sum3[1] = max(sum3[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); - sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); - sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); - sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); - sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); - sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); - sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); - sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0[0] = clamp(sum0[0], const_min, const_max); - sum0[1] = clamp(sum0[1], const_min, const_max); - sum1[0] = clamp(sum1[0], const_min, const_max); - sum1[1] = clamp(sum1[1], const_min, const_max); - sum2[0] = clamp(sum2[0], const_min, const_max); - sum2[1] = clamp(sum2[1], const_min, const_max); - sum3[0] = clamp(sum3[0], const_min, const_max); - sum3[1] = clamp(sum3[1], const_min, const_max); - } - if (activation_type == 4) - { - sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); - sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); - sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); - sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); - sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); - sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); - sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); - sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); - } - if (activation_type == 5) - { - sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); - sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); - sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); - sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); - sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); - sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); - sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); - sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd23_transform_output.comp b/src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd23_transform_output.comp index 245bc3d15..3fcfd47e2 100644 --- a/src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd23_transform_output.comp +++ b/src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd23_transform_output.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -150,77 +153,10 @@ void main() v11 = m11 - m12 + m13; } - if (activation_type == 1) - { - v00[0] = max(v00[0], afp(0.f)); - v00[1] = max(v00[1], afp(0.f)); - v10[0] = max(v10[0], afp(0.f)); - v10[1] = max(v10[1], afp(0.f)); - v01[0] = max(v01[0], afp(0.f)); - v01[1] = max(v01[1], afp(0.f)); - v11[0] = max(v11[0], afp(0.f)); - v11[1] = max(v11[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - v00[0] = mix(v00[0], v00[0] * afp(slope), lessThan(v00[0], afpvec4(0.f))); - v00[1] = mix(v00[1], v00[1] * afp(slope), lessThan(v00[1], afpvec4(0.f))); - v10[0] = mix(v10[0], v10[0] * afp(slope), lessThan(v10[0], afpvec4(0.f))); - v10[1] = mix(v10[1], v10[1] * afp(slope), lessThan(v10[1], afpvec4(0.f))); - v01[0] = mix(v01[0], v01[0] * afp(slope), lessThan(v01[0], afpvec4(0.f))); - v01[1] = mix(v01[1], v01[1] * afp(slope), lessThan(v01[1], afpvec4(0.f))); - v11[0] = mix(v11[0], v11[0] * afp(slope), lessThan(v11[0], afpvec4(0.f))); - v11[1] = mix(v11[1], v11[1] * afp(slope), lessThan(v11[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - v00[0] = clamp(v00[0], const_min, const_max); - v00[1] = clamp(v00[1], const_min, const_max); - v10[0] = clamp(v10[0], const_min, const_max); - v10[1] = clamp(v10[1], const_min, const_max); - v01[0] = clamp(v01[0], const_min, const_max); - v01[1] = clamp(v01[1], const_min, const_max); - v11[0] = clamp(v11[0], const_min, const_max); - v11[1] = clamp(v11[1], const_min, const_max); - } - if (activation_type == 4) - { - v00[0] = afp(1.f) / (afp(1.f) + exp(-v00[0])); - v00[1] = afp(1.f) / (afp(1.f) + exp(-v00[1])); - v10[0] = afp(1.f) / (afp(1.f) + exp(-v10[0])); - v10[1] = afp(1.f) / (afp(1.f) + exp(-v10[1])); - v01[0] = afp(1.f) / (afp(1.f) + exp(-v01[0])); - v01[1] = afp(1.f) / (afp(1.f) + exp(-v01[1])); - v11[0] = afp(1.f) / (afp(1.f) + exp(-v11[0])); - v11[1] = afp(1.f) / (afp(1.f) + exp(-v11[1])); - } - if (activation_type == 5) - { - v00[0] = v00[0] * tanh(log(exp(v00[0]) + afp(1.f))); - v00[1] = v00[1] * tanh(log(exp(v00[1]) + afp(1.f))); - v10[0] = v10[0] * tanh(log(exp(v10[0]) + afp(1.f))); - v10[1] = v10[1] * tanh(log(exp(v10[1]) + afp(1.f))); - v01[0] = v01[0] * tanh(log(exp(v01[0]) + afp(1.f))); - v01[1] = v01[1] * tanh(log(exp(v01[1]) + afp(1.f))); - v11[0] = v11[0] * tanh(log(exp(v11[0]) + afp(1.f))); - v11[1] = v11[1] * tanh(log(exp(v11[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - v00[0] = v00[0] * clamp(v00[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v00[1] = v00[1] * clamp(v00[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v01[0] = v01[0] * clamp(v01[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v01[1] = v01[1] * clamp(v01[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v10[0] = v10[0] * clamp(v10[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v10[1] = v10[1] * clamp(v10[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v11[0] = v11[0] * clamp(v11[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v11[1] = v11[1] * clamp(v11[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + v00 = activation_afpvec8(v00, activation_type, activation_param_0, activation_param_1); + v01 = activation_afpvec8(v01, activation_type, activation_param_0, activation_param_1); + v10 = activation_afpvec8(v10, activation_type, activation_param_0, activation_param_1); + v11 = activation_afpvec8(v11, activation_type, activation_param_0, activation_param_1); // store 2x2 int x = gx * 2; diff --git a/src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd43_transform_output.comp b/src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd43_transform_output.comp index 8fd80bad5..d04b2e4cf 100644 --- a/src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd43_transform_output.comp +++ b/src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd43_transform_output.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -238,221 +241,22 @@ void main() v33 = bias_value + v33; } - if (activation_type == 1) - { - v00[0] = max(v00[0], afp(0.f)); - v00[1] = max(v00[1], afp(0.f)); - v01[0] = max(v01[0], afp(0.f)); - v01[1] = max(v01[1], afp(0.f)); - v02[0] = max(v02[0], afp(0.f)); - v02[1] = max(v02[1], afp(0.f)); - v03[0] = max(v03[0], afp(0.f)); - v03[1] = max(v03[1], afp(0.f)); - v10[0] = max(v10[0], afp(0.f)); - v10[1] = max(v10[1], afp(0.f)); - v11[0] = max(v11[0], afp(0.f)); - v11[1] = max(v11[1], afp(0.f)); - v12[0] = max(v12[0], afp(0.f)); - v12[1] = max(v12[1], afp(0.f)); - v13[0] = max(v13[0], afp(0.f)); - v13[1] = max(v13[1], afp(0.f)); - v20[0] = max(v20[0], afp(0.f)); - v20[1] = max(v20[1], afp(0.f)); - v21[0] = max(v21[0], afp(0.f)); - v21[1] = max(v21[1], afp(0.f)); - v22[0] = max(v22[0], afp(0.f)); - v22[1] = max(v22[1], afp(0.f)); - v23[0] = max(v23[0], afp(0.f)); - v23[1] = max(v23[1], afp(0.f)); - v30[0] = max(v30[0], afp(0.f)); - v30[1] = max(v30[1], afp(0.f)); - v31[0] = max(v31[0], afp(0.f)); - v31[1] = max(v31[1], afp(0.f)); - v32[0] = max(v32[0], afp(0.f)); - v32[1] = max(v32[1], afp(0.f)); - v33[0] = max(v33[0], afp(0.f)); - v33[1] = max(v33[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - v00[0] = mix(v00[0], v00[0] * afp(slope), lessThan(v00[0], afpvec4(0.f))); - v00[1] = mix(v00[1], v00[1] * afp(slope), lessThan(v00[1], afpvec4(0.f))); - v01[0] = mix(v01[0], v01[0] * afp(slope), lessThan(v01[0], afpvec4(0.f))); - v01[1] = mix(v01[1], v01[1] * afp(slope), lessThan(v01[1], afpvec4(0.f))); - v02[0] = mix(v02[0], v02[0] * afp(slope), lessThan(v02[0], afpvec4(0.f))); - v02[1] = mix(v02[1], v02[1] * afp(slope), lessThan(v02[1], afpvec4(0.f))); - v03[0] = mix(v03[0], v03[0] * afp(slope), lessThan(v03[0], afpvec4(0.f))); - v03[1] = mix(v03[1], v03[1] * afp(slope), lessThan(v03[1], afpvec4(0.f))); - v10[0] = mix(v10[0], v10[0] * afp(slope), lessThan(v10[0], afpvec4(0.f))); - v10[1] = mix(v10[1], v10[1] * afp(slope), lessThan(v10[1], afpvec4(0.f))); - v11[0] = mix(v11[0], v11[0] * afp(slope), lessThan(v11[0], afpvec4(0.f))); - v11[1] = mix(v11[1], v11[1] * afp(slope), lessThan(v11[1], afpvec4(0.f))); - v12[0] = mix(v12[0], v12[0] * afp(slope), lessThan(v12[0], afpvec4(0.f))); - v12[1] = mix(v12[1], v12[1] * afp(slope), lessThan(v12[1], afpvec4(0.f))); - v13[0] = mix(v13[0], v13[0] * afp(slope), lessThan(v13[0], afpvec4(0.f))); - v13[1] = mix(v13[1], v13[1] * afp(slope), lessThan(v13[1], afpvec4(0.f))); - v20[0] = mix(v20[0], v20[0] * afp(slope), lessThan(v20[0], afpvec4(0.f))); - v20[1] = mix(v20[1], v20[1] * afp(slope), lessThan(v20[1], afpvec4(0.f))); - v21[0] = mix(v21[0], v21[0] * afp(slope), lessThan(v21[0], afpvec4(0.f))); - v21[1] = mix(v21[1], v21[1] * afp(slope), lessThan(v21[1], afpvec4(0.f))); - v22[0] = mix(v22[0], v22[0] * afp(slope), lessThan(v22[0], afpvec4(0.f))); - v22[1] = mix(v22[1], v22[1] * afp(slope), lessThan(v22[1], afpvec4(0.f))); - v23[0] = mix(v23[0], v23[0] * afp(slope), lessThan(v23[0], afpvec4(0.f))); - v23[1] = mix(v23[1], v23[1] * afp(slope), lessThan(v23[1], afpvec4(0.f))); - v30[0] = mix(v30[0], v30[0] * afp(slope), lessThan(v30[0], afpvec4(0.f))); - v30[1] = mix(v30[1], v30[1] * afp(slope), lessThan(v30[1], afpvec4(0.f))); - v31[0] = mix(v31[0], v31[0] * afp(slope), lessThan(v31[0], afpvec4(0.f))); - v31[1] = mix(v31[1], v31[1] * afp(slope), lessThan(v31[1], afpvec4(0.f))); - v32[0] = mix(v32[0], v32[0] * afp(slope), lessThan(v32[0], afpvec4(0.f))); - v32[1] = mix(v32[1], v32[1] * afp(slope), lessThan(v32[1], afpvec4(0.f))); - v33[0] = mix(v33[0], v33[0] * afp(slope), lessThan(v33[0], afpvec4(0.f))); - v33[1] = mix(v33[1], v33[1] * afp(slope), lessThan(v33[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - v00[0] = clamp(v00[0], const_min, const_max); - v00[1] = clamp(v00[1], const_min, const_max); - v01[0] = clamp(v01[0], const_min, const_max); - v01[1] = clamp(v01[1], const_min, const_max); - v02[0] = clamp(v02[0], const_min, const_max); - v02[1] = clamp(v02[1], const_min, const_max); - v03[0] = clamp(v03[0], const_min, const_max); - v03[1] = clamp(v03[1], const_min, const_max); - v10[0] = clamp(v10[0], const_min, const_max); - v10[1] = clamp(v10[1], const_min, const_max); - v11[0] = clamp(v11[0], const_min, const_max); - v11[1] = clamp(v11[1], const_min, const_max); - v12[0] = clamp(v12[0], const_min, const_max); - v12[1] = clamp(v12[1], const_min, const_max); - v13[0] = clamp(v13[0], const_min, const_max); - v13[1] = clamp(v13[1], const_min, const_max); - v20[0] = clamp(v20[0], const_min, const_max); - v20[1] = clamp(v20[1], const_min, const_max); - v21[0] = clamp(v21[0], const_min, const_max); - v21[1] = clamp(v21[1], const_min, const_max); - v22[0] = clamp(v22[0], const_min, const_max); - v22[1] = clamp(v22[1], const_min, const_max); - v23[0] = clamp(v23[0], const_min, const_max); - v23[1] = clamp(v23[1], const_min, const_max); - v30[0] = clamp(v30[0], const_min, const_max); - v30[1] = clamp(v30[1], const_min, const_max); - v31[0] = clamp(v31[0], const_min, const_max); - v31[1] = clamp(v31[1], const_min, const_max); - v32[0] = clamp(v32[0], const_min, const_max); - v32[1] = clamp(v32[1], const_min, const_max); - v33[0] = clamp(v33[0], const_min, const_max); - v33[1] = clamp(v33[1], const_min, const_max); - } - if (activation_type == 4) - { - v00[0] = afp(1.f) / (afp(1.f) + exp(-v00[0])); - v00[1] = afp(1.f) / (afp(1.f) + exp(-v00[1])); - v01[0] = afp(1.f) / (afp(1.f) + exp(-v01[0])); - v01[1] = afp(1.f) / (afp(1.f) + exp(-v01[1])); - v02[0] = afp(1.f) / (afp(1.f) + exp(-v02[0])); - v02[1] = afp(1.f) / (afp(1.f) + exp(-v02[1])); - v03[0] = afp(1.f) / (afp(1.f) + exp(-v03[0])); - v03[1] = afp(1.f) / (afp(1.f) + exp(-v03[1])); - v10[0] = afp(1.f) / (afp(1.f) + exp(-v10[0])); - v10[1] = afp(1.f) / (afp(1.f) + exp(-v10[1])); - v11[0] = afp(1.f) / (afp(1.f) + exp(-v11[0])); - v11[1] = afp(1.f) / (afp(1.f) + exp(-v11[1])); - v12[0] = afp(1.f) / (afp(1.f) + exp(-v12[0])); - v12[1] = afp(1.f) / (afp(1.f) + exp(-v12[1])); - v13[0] = afp(1.f) / (afp(1.f) + exp(-v13[0])); - v13[1] = afp(1.f) / (afp(1.f) + exp(-v13[1])); - v20[0] = afp(1.f) / (afp(1.f) + exp(-v20[0])); - v20[1] = afp(1.f) / (afp(1.f) + exp(-v20[1])); - v21[0] = afp(1.f) / (afp(1.f) + exp(-v21[0])); - v21[1] = afp(1.f) / (afp(1.f) + exp(-v21[1])); - v22[0] = afp(1.f) / (afp(1.f) + exp(-v22[0])); - v22[1] = afp(1.f) / (afp(1.f) + exp(-v22[1])); - v23[0] = afp(1.f) / (afp(1.f) + exp(-v23[0])); - v23[1] = afp(1.f) / (afp(1.f) + exp(-v23[1])); - v30[0] = afp(1.f) / (afp(1.f) + exp(-v30[0])); - v30[1] = afp(1.f) / (afp(1.f) + exp(-v30[1])); - v31[0] = afp(1.f) / (afp(1.f) + exp(-v31[0])); - v31[1] = afp(1.f) / (afp(1.f) + exp(-v31[1])); - v32[0] = afp(1.f) / (afp(1.f) + exp(-v32[0])); - v32[1] = afp(1.f) / (afp(1.f) + exp(-v32[1])); - v33[0] = afp(1.f) / (afp(1.f) + exp(-v33[0])); - v33[1] = afp(1.f) / (afp(1.f) + exp(-v33[1])); - } - if (activation_type == 5) - { - v00[0] = v00[0] * tanh(log(exp(v00[0]) + afp(1.f))); - v00[1] = v00[1] * tanh(log(exp(v00[1]) + afp(1.f))); - v01[0] = v01[0] * tanh(log(exp(v01[0]) + afp(1.f))); - v01[1] = v01[1] * tanh(log(exp(v01[1]) + afp(1.f))); - v02[0] = v02[0] * tanh(log(exp(v02[0]) + afp(1.f))); - v02[1] = v02[1] * tanh(log(exp(v02[1]) + afp(1.f))); - v03[0] = v03[0] * tanh(log(exp(v03[0]) + afp(1.f))); - v03[1] = v03[1] * tanh(log(exp(v03[1]) + afp(1.f))); - v10[0] = v10[0] * tanh(log(exp(v10[0]) + afp(1.f))); - v10[1] = v10[1] * tanh(log(exp(v10[1]) + afp(1.f))); - v11[0] = v11[0] * tanh(log(exp(v11[0]) + afp(1.f))); - v11[1] = v11[1] * tanh(log(exp(v11[1]) + afp(1.f))); - v12[0] = v12[0] * tanh(log(exp(v12[0]) + afp(1.f))); - v12[1] = v12[1] * tanh(log(exp(v12[1]) + afp(1.f))); - v13[0] = v13[0] * tanh(log(exp(v13[0]) + afp(1.f))); - v13[1] = v13[1] * tanh(log(exp(v13[1]) + afp(1.f))); - v20[0] = v20[0] * tanh(log(exp(v20[0]) + afp(1.f))); - v20[1] = v20[1] * tanh(log(exp(v20[1]) + afp(1.f))); - v21[0] = v21[0] * tanh(log(exp(v21[0]) + afp(1.f))); - v21[1] = v21[1] * tanh(log(exp(v21[1]) + afp(1.f))); - v22[0] = v22[0] * tanh(log(exp(v22[0]) + afp(1.f))); - v22[1] = v22[1] * tanh(log(exp(v22[1]) + afp(1.f))); - v23[0] = v23[0] * tanh(log(exp(v23[0]) + afp(1.f))); - v23[1] = v23[1] * tanh(log(exp(v23[1]) + afp(1.f))); - v30[0] = v30[0] * tanh(log(exp(v30[0]) + afp(1.f))); - v30[1] = v30[1] * tanh(log(exp(v30[1]) + afp(1.f))); - v31[0] = v31[0] * tanh(log(exp(v31[0]) + afp(1.f))); - v31[1] = v31[1] * tanh(log(exp(v31[1]) + afp(1.f))); - v32[0] = v32[0] * tanh(log(exp(v32[0]) + afp(1.f))); - v32[1] = v32[1] * tanh(log(exp(v32[1]) + afp(1.f))); - v33[0] = v33[0] * tanh(log(exp(v33[0]) + afp(1.f))); - v33[1] = v33[1] * tanh(log(exp(v33[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - v00[0] = v00[0] * clamp(v00[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v00[1] = v00[1] * clamp(v00[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v01[0] = v01[0] * clamp(v01[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v01[1] = v01[1] * clamp(v01[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v02[0] = v02[0] * clamp(v02[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v02[1] = v02[1] * clamp(v02[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v03[0] = v03[0] * clamp(v03[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v03[1] = v03[1] * clamp(v03[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v10[0] = v10[0] * clamp(v10[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v10[1] = v10[1] * clamp(v10[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v11[0] = v11[0] * clamp(v11[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v11[1] = v11[1] * clamp(v11[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v12[0] = v12[0] * clamp(v12[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v12[1] = v12[1] * clamp(v12[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v13[0] = v13[0] * clamp(v13[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v13[1] = v13[1] * clamp(v13[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v20[0] = v20[0] * clamp(v20[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v20[1] = v20[1] * clamp(v20[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v21[0] = v21[0] * clamp(v21[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v21[1] = v21[1] * clamp(v21[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v22[0] = v22[0] * clamp(v22[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v22[1] = v22[1] * clamp(v22[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v23[0] = v23[0] * clamp(v23[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v23[1] = v23[1] * clamp(v23[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v30[0] = v30[0] * clamp(v30[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v30[1] = v30[1] * clamp(v30[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v31[0] = v31[0] * clamp(v31[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v31[1] = v31[1] * clamp(v31[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v32[0] = v32[0] * clamp(v32[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v32[1] = v32[1] * clamp(v32[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v33[0] = v33[0] * clamp(v33[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v33[1] = v33[1] * clamp(v33[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + v00 = activation_afpvec8(v00, activation_type, activation_param_0, activation_param_1); + v01 = activation_afpvec8(v01, activation_type, activation_param_0, activation_param_1); + v02 = activation_afpvec8(v02, activation_type, activation_param_0, activation_param_1); + v03 = activation_afpvec8(v03, activation_type, activation_param_0, activation_param_1); + v10 = activation_afpvec8(v10, activation_type, activation_param_0, activation_param_1); + v11 = activation_afpvec8(v11, activation_type, activation_param_0, activation_param_1); + v12 = activation_afpvec8(v12, activation_type, activation_param_0, activation_param_1); + v13 = activation_afpvec8(v13, activation_type, activation_param_0, activation_param_1); + v20 = activation_afpvec8(v20, activation_type, activation_param_0, activation_param_1); + v21 = activation_afpvec8(v21, activation_type, activation_param_0, activation_param_1); + v22 = activation_afpvec8(v22, activation_type, activation_param_0, activation_param_1); + v23 = activation_afpvec8(v23, activation_type, activation_param_0, activation_param_1); + v30 = activation_afpvec8(v30, activation_type, activation_param_0, activation_param_1); + v31 = activation_afpvec8(v31, activation_type, activation_param_0, activation_param_1); + v32 = activation_afpvec8(v32, activation_type, activation_param_0, activation_param_1); + v33 = activation_afpvec8(v33, activation_type, activation_param_0, activation_param_1); // store 4x4 int x = gx * 4; diff --git a/src/layer/vulkan/shader/convolution_pack8_gemm.comp b/src/layer/vulkan/shader/convolution_pack8_gemm.comp index be7116c3b..1be766b71 100644 --- a/src/layer/vulkan/shader/convolution_pack8_gemm.comp +++ b/src/layer/vulkan/shader/convolution_pack8_gemm.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -245,77 +248,10 @@ void main() } #endif - if (activation_type == 1) - { - sum0[0] = max(sum0[0], afp(0.f)); - sum0[1] = max(sum0[1], afp(0.f)); - sum1[0] = max(sum1[0], afp(0.f)); - sum1[1] = max(sum1[1], afp(0.f)); - sum2[0] = max(sum2[0], afp(0.f)); - sum2[1] = max(sum2[1], afp(0.f)); - sum3[0] = max(sum3[0], afp(0.f)); - sum3[1] = max(sum3[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); - sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); - sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); - sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); - sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); - sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); - sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); - sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0[0] = clamp(sum0[0], const_min, const_max); - sum0[1] = clamp(sum0[1], const_min, const_max); - sum1[0] = clamp(sum1[0], const_min, const_max); - sum1[1] = clamp(sum1[1], const_min, const_max); - sum2[0] = clamp(sum2[0], const_min, const_max); - sum2[1] = clamp(sum2[1], const_min, const_max); - sum3[0] = clamp(sum3[0], const_min, const_max); - sum3[1] = clamp(sum3[1], const_min, const_max); - } - if (activation_type == 4) - { - sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); - sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); - sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); - sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); - sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); - sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); - sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); - sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); - } - if (activation_type == 5) - { - sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); - sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); - sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); - sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); - sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); - sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); - sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); - sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack8to1.comp b/src/layer/vulkan/shader/convolution_pack8to1.comp index c1295e939..ea648c862 100644 --- a/src/layer/vulkan/shader/convolution_pack8to1.comp +++ b/src/layer/vulkan/shader/convolution_pack8to1.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -198,77 +201,14 @@ void main() } #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - sum4 = max(sum4, afp(0.f)); - sum5 = max(sum5, afp(0.f)); - sum6 = max(sum6, afp(0.f)); - sum7 = max(sum7, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = sum0 < afp(0.f) ? sum0 * slope : sum0; - sum1 = sum1 < afp(0.f) ? sum1 * slope : sum1; - sum2 = sum2 < afp(0.f) ? sum2 * slope : sum2; - sum3 = sum3 < afp(0.f) ? sum3 * slope : sum3; - sum4 = sum4 < afp(0.f) ? sum4 * slope : sum4; - sum5 = sum5 < afp(0.f) ? sum5 * slope : sum5; - sum6 = sum6 < afp(0.f) ? sum6 * slope : sum6; - sum7 = sum7 < afp(0.f) ? sum7 * slope : sum7; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - sum4 = clamp(sum4, const_min, const_max); - sum5 = clamp(sum5, const_min, const_max); - sum6 = clamp(sum6, const_min, const_max); - sum7 = clamp(sum7, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - sum4 = afp(1.f) / (afp(1.f) + exp(-sum4)); - sum5 = afp(1.f) / (afp(1.f) + exp(-sum5)); - sum6 = afp(1.f) / (afp(1.f) + exp(-sum6)); - sum7 = afp(1.f) / (afp(1.f) + exp(-sum7)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - sum4 = sum4 * tanh(log(exp(sum4) + afp(1.f))); - sum5 = sum5 * tanh(log(exp(sum5) + afp(1.f))); - sum6 = sum6 * tanh(log(exp(sum6) + afp(1.f))); - sum7 = sum7 * tanh(log(exp(sum7) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum4 = sum4 * clamp(sum4 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum5 = sum5 * clamp(sum5 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum6 = sum6 * clamp(sum6 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum7 = sum7 * clamp(sum7 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1); + sum4 = activation_afp(sum4, activation_type, activation_param_0, activation_param_1); + sum5 = activation_afp(sum5, activation_type, activation_param_0, activation_param_1); + sum6 = activation_afp(sum6, activation_type, activation_param_0, activation_param_1); + sum7 = activation_afp(sum7, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack8to1_1x1s1d1.comp b/src/layer/vulkan/shader/convolution_pack8to1_1x1s1d1.comp index f26c4d8ca..d1fa1ed7f 100644 --- a/src/layer/vulkan/shader/convolution_pack8to1_1x1s1d1.comp +++ b/src/layer/vulkan/shader/convolution_pack8to1_1x1s1d1.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -146,53 +149,10 @@ void main() } #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = sum0 < afp(0.f) ? sum0 * slope : sum0; - sum1 = sum1 < afp(0.f) ? sum1 * slope : sum1; - sum2 = sum2 < afp(0.f) ? sum2 * slope : sum2; - sum3 = sum3 < afp(0.f) ? sum3 * slope : sum3; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack8to1_gemm.comp b/src/layer/vulkan/shader/convolution_pack8to1_gemm.comp index 465cd5ce9..0fd9b0aca 100644 --- a/src/layer/vulkan/shader/convolution_pack8to1_gemm.comp +++ b/src/layer/vulkan/shader/convolution_pack8to1_gemm.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -169,53 +172,10 @@ void main() } #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = sum0 < afp(0.f) ? sum0 * slope : sum0; - sum1 = sum1 < afp(0.f) ? sum1 * slope : sum1; - sum2 = sum2 < afp(0.f) ? sum2 * slope : sum2; - sum3 = sum3 < afp(0.f) ? sum3 * slope : sum3; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack8to4.comp b/src/layer/vulkan/shader/convolution_pack8to4.comp index b2931677e..90a05b05f 100644 --- a/src/layer/vulkan/shader/convolution_pack8to4.comp +++ b/src/layer/vulkan/shader/convolution_pack8to4.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -258,77 +261,14 @@ void main() } #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - sum4 = max(sum4, afp(0.f)); - sum5 = max(sum5, afp(0.f)); - sum6 = max(sum6, afp(0.f)); - sum7 = max(sum7, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); - sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); - sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); - sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); - sum4 = mix(sum4, sum4 * afp(slope), lessThan(sum4, afpvec4(0.f))); - sum5 = mix(sum5, sum5 * afp(slope), lessThan(sum5, afpvec4(0.f))); - sum6 = mix(sum6, sum6 * afp(slope), lessThan(sum6, afpvec4(0.f))); - sum7 = mix(sum7, sum7 * afp(slope), lessThan(sum7, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - sum4 = clamp(sum4, const_min, const_max); - sum5 = clamp(sum5, const_min, const_max); - sum6 = clamp(sum6, const_min, const_max); - sum7 = clamp(sum7, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - sum4 = afp(1.f) / (afp(1.f) + exp(-sum4)); - sum5 = afp(1.f) / (afp(1.f) + exp(-sum5)); - sum6 = afp(1.f) / (afp(1.f) + exp(-sum6)); - sum7 = afp(1.f) / (afp(1.f) + exp(-sum7)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - sum4 = sum4 * tanh(log(exp(sum4) + afp(1.f))); - sum5 = sum5 * tanh(log(exp(sum5) + afp(1.f))); - sum6 = sum6 * tanh(log(exp(sum6) + afp(1.f))); - sum7 = sum7 * tanh(log(exp(sum7) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum4 = sum4 * clamp(sum4 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum5 = sum5 * clamp(sum5 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum6 = sum6 * clamp(sum6 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum7 = sum7 * clamp(sum7 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); + sum4 = activation_afpvec4(sum4, activation_type, activation_param_0, activation_param_1); + sum5 = activation_afpvec4(sum5, activation_type, activation_param_0, activation_param_1); + sum6 = activation_afpvec4(sum6, activation_type, activation_param_0, activation_param_1); + sum7 = activation_afpvec4(sum7, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack8to4_1x1s1d1.comp b/src/layer/vulkan/shader/convolution_pack8to4_1x1s1d1.comp index 0803a9fd5..513d51a5e 100644 --- a/src/layer/vulkan/shader/convolution_pack8to4_1x1s1d1.comp +++ b/src/layer/vulkan/shader/convolution_pack8to4_1x1s1d1.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -182,53 +185,10 @@ void main() } #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); - sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); - sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); - sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolution_pack8to4_gemm.comp b/src/layer/vulkan/shader/convolution_pack8to4_gemm.comp index 062ee7408..6860ee5bd 100644 --- a/src/layer/vulkan/shader/convolution_pack8to4_gemm.comp +++ b/src/layer/vulkan/shader/convolution_pack8to4_gemm.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -205,53 +208,10 @@ void main() } #endif - if (activation_type == 1) - { - sum0 = max(sum0, afp(0.f)); - sum1 = max(sum1, afp(0.f)); - sum2 = max(sum2, afp(0.f)); - sum3 = max(sum3, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); - sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); - sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); - sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum0 = clamp(sum0, const_min, const_max); - sum1 = clamp(sum1, const_min, const_max); - sum2 = clamp(sum2, const_min, const_max); - sum3 = clamp(sum3, const_min, const_max); - } - if (activation_type == 4) - { - sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); - sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); - sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); - sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); - } - if (activation_type == 5) - { - sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); - sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); - sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); - sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); + sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); + sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); + sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); diff --git a/src/layer/vulkan/shader/convolutiondepthwise.comp b/src/layer/vulkan/shader/convolutiondepthwise.comp index b887fabbe..ade54eb47 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -132,35 +135,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group.comp b/src/layer/vulkan/shader/convolutiondepthwise_group.comp index 7094ce48a..9393d7dd9 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -152,35 +155,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to4.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to4.comp index 51b5fdc48..3169dc299 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to4.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to4.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -160,35 +163,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to8.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to8.comp index 76a00c018..92ff324ef 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to8.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -165,41 +168,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } - if (activation_type == 5) - { - sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); - sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4.comp index e8b690d1b..727534b33 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -180,35 +183,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to1.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to1.comp index 857ca2234..2a1e12ffb 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to1.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to1.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -160,35 +163,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to8.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to8.comp index 1ca917d55..9a31a21d2 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to8.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -191,41 +194,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } - if (activation_type == 5) - { - sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); - sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8.comp index cc006d457..d9cf03650 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -191,41 +194,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } - if (activation_type == 5) - { - sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); - sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to1.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to1.comp index 1eddb2996..5bbef0720 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to1.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to1.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -163,35 +166,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to4.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to4.comp index 7d89d7964..f7b3d9d83 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to4.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to4.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -175,35 +178,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_pack4.comp b/src/layer/vulkan/shader/convolutiondepthwise_pack4.comp index 2cbd977a1..f41aa7842 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_pack4.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_pack4.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -140,35 +143,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/convolutiondepthwise_pack8.comp b/src/layer/vulkan/shader/convolutiondepthwise_pack8.comp index e3ccfdfc0..77fcc8141 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_pack8.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_pack8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -145,41 +148,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } - if (activation_type == 5) - { - sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); - sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolution.comp b/src/layer/vulkan/shader/deconvolution.comp index c48034156..b5ebf4265 100644 --- a/src/layer/vulkan/shader/deconvolution.comp +++ b/src/layer/vulkan/shader/deconvolution.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -165,25 +168,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolution_col2im.comp b/src/layer/vulkan/shader/deconvolution_col2im.comp index d65a5dc0e..790765d86 100644 --- a/src/layer/vulkan/shader/deconvolution_col2im.comp +++ b/src/layer/vulkan/shader/deconvolution_col2im.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -120,25 +123,7 @@ void main() } } - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolution_pack1to4.comp b/src/layer/vulkan/shader/deconvolution_pack1to4.comp index 0a56820d3..932cd3615 100644 --- a/src/layer/vulkan/shader/deconvolution_pack1to4.comp +++ b/src/layer/vulkan/shader/deconvolution_pack1to4.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -173,25 +176,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolution_pack1to8.comp b/src/layer/vulkan/shader/deconvolution_pack1to8.comp index f6227cd34..c6ca9cec3 100644 --- a/src/layer/vulkan/shader/deconvolution_pack1to8.comp +++ b/src/layer/vulkan/shader/deconvolution_pack1to8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -178,29 +181,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolution_pack4.comp b/src/layer/vulkan/shader/deconvolution_pack4.comp index baed6081b..f7ee538b8 100644 --- a/src/layer/vulkan/shader/deconvolution_pack4.comp +++ b/src/layer/vulkan/shader/deconvolution_pack4.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -193,25 +196,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolution_pack4_col2im.comp b/src/layer/vulkan/shader/deconvolution_pack4_col2im.comp index 8ef5e1678..9b548fe43 100644 --- a/src/layer/vulkan/shader/deconvolution_pack4_col2im.comp +++ b/src/layer/vulkan/shader/deconvolution_pack4_col2im.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -120,25 +123,7 @@ void main() } } - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolution_pack4to1.comp b/src/layer/vulkan/shader/deconvolution_pack4to1.comp index 32bf34dd2..c8ecfe90b 100644 --- a/src/layer/vulkan/shader/deconvolution_pack4to1.comp +++ b/src/layer/vulkan/shader/deconvolution_pack4to1.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -173,25 +176,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolution_pack4to8.comp b/src/layer/vulkan/shader/deconvolution_pack4to8.comp index a1eaf71a1..a96428299 100644 --- a/src/layer/vulkan/shader/deconvolution_pack4to8.comp +++ b/src/layer/vulkan/shader/deconvolution_pack4to8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -204,29 +207,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolution_pack8.comp b/src/layer/vulkan/shader/deconvolution_pack8.comp index 5e042f57d..f120fa9c1 100644 --- a/src/layer/vulkan/shader/deconvolution_pack8.comp +++ b/src/layer/vulkan/shader/deconvolution_pack8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -204,29 +207,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolution_pack8_col2im.comp b/src/layer/vulkan/shader/deconvolution_pack8_col2im.comp index f395f81ec..e53a49323 100644 --- a/src/layer/vulkan/shader/deconvolution_pack8_col2im.comp +++ b/src/layer/vulkan/shader/deconvolution_pack8_col2im.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -121,29 +124,7 @@ void main() } } - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolution_pack8to1.comp b/src/layer/vulkan/shader/deconvolution_pack8to1.comp index 64e8913c8..8e8f18fad 100644 --- a/src/layer/vulkan/shader/deconvolution_pack8to1.comp +++ b/src/layer/vulkan/shader/deconvolution_pack8to1.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -176,25 +179,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolution_pack8to4.comp b/src/layer/vulkan/shader/deconvolution_pack8to4.comp index 2039748ec..1556d9fd6 100644 --- a/src/layer/vulkan/shader/deconvolution_pack8to4.comp +++ b/src/layer/vulkan/shader/deconvolution_pack8to4.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -188,25 +191,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise.comp b/src/layer/vulkan/shader/deconvolutiondepthwise.comp index 0aebdf2ad..f5be10e15 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -159,25 +162,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group.comp index 4b2483fa0..1402cd2c8 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -177,25 +180,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to4.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to4.comp index abdf31067..a40451eec 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to4.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to4.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -185,25 +188,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to8.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to8.comp index ee20f73c9..53e309aaa 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to8.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -190,29 +193,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4.comp index c345fac9b..4f287ae65 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -205,25 +208,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to1.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to1.comp index 2b76617da..f131aeefb 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to1.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to1.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -185,25 +188,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to8.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to8.comp index 8956b2216..723bc74a7 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to8.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -216,29 +219,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8.comp index 884ca96f8..d32662a08 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -216,29 +219,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to1.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to1.comp index f68cd94a7..6fa3ffa14 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to1.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to1.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -188,25 +191,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to4.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to4.comp index 3df4ab3d4..89908e0cb 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to4.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to4.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -200,25 +203,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_pack4.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_pack4.comp index 4ae56eff3..b363329a9 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_pack4.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_pack4.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -167,25 +170,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_pack8.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_pack8.comp index c90f3d6c3..9d1ac5655 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_pack8.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_pack8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int kernel_w = 1; layout (constant_id = 1) const int kernel_h = 1; layout (constant_id = 2) const int dilation_w = 1; @@ -172,29 +175,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, gy, gz), sum); diff --git a/src/layer/vulkan/shader/innerproduct.comp b/src/layer/vulkan/shader/innerproduct.comp index e4af6796e..467e1c14b 100644 --- a/src/layer/vulkan/shader/innerproduct.comp +++ b/src/layer/vulkan/shader/innerproduct.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -104,35 +107,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, 0, 0), sum); diff --git a/src/layer/vulkan/shader/innerproduct_gemm.comp b/src/layer/vulkan/shader/innerproduct_gemm.comp index b3701824c..058ee6b5b 100644 --- a/src/layer/vulkan/shader/innerproduct_gemm.comp +++ b/src/layer/vulkan/shader/innerproduct_gemm.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -105,35 +108,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, 0), sum); diff --git a/src/layer/vulkan/shader/innerproduct_gemm_wp1to4.comp b/src/layer/vulkan/shader/innerproduct_gemm_wp1to4.comp index de88bf604..ab0b304d8 100644 --- a/src/layer/vulkan/shader/innerproduct_gemm_wp1to4.comp +++ b/src/layer/vulkan/shader/innerproduct_gemm_wp1to4.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -113,35 +116,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx * 4 + 0, gy, 0), sum.r); diff --git a/src/layer/vulkan/shader/innerproduct_gemm_wp1to8.comp b/src/layer/vulkan/shader/innerproduct_gemm_wp1to8.comp index 524be6e7d..5bda8f6b8 100644 --- a/src/layer/vulkan/shader/innerproduct_gemm_wp1to8.comp +++ b/src/layer/vulkan/shader/innerproduct_gemm_wp1to8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -118,41 +121,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } - if (activation_type == 5) - { - sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); - sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx * 8 + 0, gy, 0), sum[0].r); diff --git a/src/layer/vulkan/shader/innerproduct_gemm_wp4.comp b/src/layer/vulkan/shader/innerproduct_gemm_wp4.comp index 0be080e10..682368877 100644 --- a/src/layer/vulkan/shader/innerproduct_gemm_wp4.comp +++ b/src/layer/vulkan/shader/innerproduct_gemm_wp4.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -141,35 +144,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx * 4 + 0, gy, 0), sum.r); diff --git a/src/layer/vulkan/shader/innerproduct_gemm_wp4to1.comp b/src/layer/vulkan/shader/innerproduct_gemm_wp4to1.comp index c5d9a0308..b7a668f8d 100644 --- a/src/layer/vulkan/shader/innerproduct_gemm_wp4to1.comp +++ b/src/layer/vulkan/shader/innerproduct_gemm_wp4to1.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -121,35 +124,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, 0), sum); diff --git a/src/layer/vulkan/shader/innerproduct_gemm_wp4to8.comp b/src/layer/vulkan/shader/innerproduct_gemm_wp4to8.comp index a14678d06..2de98e6a1 100644 --- a/src/layer/vulkan/shader/innerproduct_gemm_wp4to8.comp +++ b/src/layer/vulkan/shader/innerproduct_gemm_wp4to8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -152,41 +155,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } - if (activation_type == 5) - { - sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); - sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx * 8 + 0, gy, 0), sum[0].r); diff --git a/src/layer/vulkan/shader/innerproduct_gemm_wp8.comp b/src/layer/vulkan/shader/innerproduct_gemm_wp8.comp index 5d01dee7d..a75177db5 100644 --- a/src/layer/vulkan/shader/innerproduct_gemm_wp8.comp +++ b/src/layer/vulkan/shader/innerproduct_gemm_wp8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -160,41 +163,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } - if (activation_type == 5) - { - sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); - sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx * 8 + 0, gy, 0), sum[0].r); diff --git a/src/layer/vulkan/shader/innerproduct_gemm_wp8to1.comp b/src/layer/vulkan/shader/innerproduct_gemm_wp8to1.comp index 30ac3e14a..001e93237 100644 --- a/src/layer/vulkan/shader/innerproduct_gemm_wp8to1.comp +++ b/src/layer/vulkan/shader/innerproduct_gemm_wp8to1.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -132,35 +135,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, gy, 0), sum); diff --git a/src/layer/vulkan/shader/innerproduct_gemm_wp8to4.comp b/src/layer/vulkan/shader/innerproduct_gemm_wp8to4.comp index 8093568fc..3c263cd0e 100644 --- a/src/layer/vulkan/shader/innerproduct_gemm_wp8to4.comp +++ b/src/layer/vulkan/shader/innerproduct_gemm_wp8to4.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -144,35 +147,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx * 4 + 0, gy, 0), sum.r); diff --git a/src/layer/vulkan/shader/innerproduct_pack1to4.comp b/src/layer/vulkan/shader/innerproduct_pack1to4.comp index 24b3c7743..a318171e4 100644 --- a/src/layer/vulkan/shader/innerproduct_pack1to4.comp +++ b/src/layer/vulkan/shader/innerproduct_pack1to4.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -112,35 +115,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, 0, 0), sum); diff --git a/src/layer/vulkan/shader/innerproduct_pack1to8.comp b/src/layer/vulkan/shader/innerproduct_pack1to8.comp index a3a6e973c..c86d406c8 100644 --- a/src/layer/vulkan/shader/innerproduct_pack1to8.comp +++ b/src/layer/vulkan/shader/innerproduct_pack1to8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -119,41 +122,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } - if (activation_type == 5) - { - sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); - sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, 0, 0), sum); diff --git a/src/layer/vulkan/shader/innerproduct_pack4.comp b/src/layer/vulkan/shader/innerproduct_pack4.comp index b7ffcc59f..ac11c88e9 100644 --- a/src/layer/vulkan/shader/innerproduct_pack4.comp +++ b/src/layer/vulkan/shader/innerproduct_pack4.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -135,35 +138,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, 0, 0), sum); diff --git a/src/layer/vulkan/shader/innerproduct_pack4to1.comp b/src/layer/vulkan/shader/innerproduct_pack4to1.comp index 2ec2459cf..73cbf9918 100644 --- a/src/layer/vulkan/shader/innerproduct_pack4to1.comp +++ b/src/layer/vulkan/shader/innerproduct_pack4to1.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -112,35 +115,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, 0, 0), sum); diff --git a/src/layer/vulkan/shader/innerproduct_pack4to8.comp b/src/layer/vulkan/shader/innerproduct_pack4to8.comp index c4a16843d..61daf568b 100644 --- a/src/layer/vulkan/shader/innerproduct_pack4to8.comp +++ b/src/layer/vulkan/shader/innerproduct_pack4to8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -147,41 +150,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } - if (activation_type == 5) - { - sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); - sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, 0, 0), sum); diff --git a/src/layer/vulkan/shader/innerproduct_pack8.comp b/src/layer/vulkan/shader/innerproduct_pack8.comp index 293feed9b..ce2ac1a19 100644 --- a/src/layer/vulkan/shader/innerproduct_pack8.comp +++ b/src/layer/vulkan/shader/innerproduct_pack8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -147,41 +150,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } - if (activation_type == 5) - { - sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); - sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, 0, 0), sum); diff --git a/src/layer/vulkan/shader/innerproduct_pack8to1.comp b/src/layer/vulkan/shader/innerproduct_pack8to1.comp index e0c0ae5ab..831795c9e 100644 --- a/src/layer/vulkan/shader/innerproduct_pack8to1.comp +++ b/src/layer/vulkan/shader/innerproduct_pack8to1.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -115,35 +118,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, 0, 0), sum); diff --git a/src/layer/vulkan/shader/innerproduct_pack8to4.comp b/src/layer/vulkan/shader/innerproduct_pack8to4.comp index 8cc190949..853c79085 100644 --- a/src/layer/vulkan/shader/innerproduct_pack8to4.comp +++ b/src/layer/vulkan/shader/innerproduct_pack8to4.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -131,35 +134,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, 0, 0), sum); diff --git a/src/layer/vulkan/shader/innerproduct_reduce_sum8.comp b/src/layer/vulkan/shader/innerproduct_reduce_sum8.comp index 8d0a877f9..cb16011d2 100644 --- a/src/layer/vulkan/shader/innerproduct_reduce_sum8.comp +++ b/src/layer/vulkan/shader/innerproduct_reduce_sum8.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -88,35 +91,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = sum < afp(0.f) ? sum * slope : sum; - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st1(top_blob, ivec3(gx, 0, 0), sum); diff --git a/src/layer/vulkan/shader/innerproduct_reduce_sum8_pack4.comp b/src/layer/vulkan/shader/innerproduct_reduce_sum8_pack4.comp index 55b38031c..bbc343bf2 100644 --- a/src/layer/vulkan/shader/innerproduct_reduce_sum8_pack4.comp +++ b/src/layer/vulkan/shader/innerproduct_reduce_sum8_pack4.comp @@ -21,6 +21,9 @@ #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -88,35 +91,7 @@ void main() } #endif - if (activation_type == 1) - { - sum = max(sum, afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum = clamp(sum, const_min, const_max); - } - if (activation_type == 4) - { - sum = afp(1.f) / (afp(1.f) + exp(-sum)); - } - if (activation_type == 5) - { - sum = sum * tanh(log(exp(sum) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st4(top_blob, ivec3(gx, 0, 0), sum); diff --git a/src/layer/vulkan/shader/innerproduct_reduce_sum8_pack8.comp b/src/layer/vulkan/shader/innerproduct_reduce_sum8_pack8.comp index ae44fc835..51eea7a2a 100644 --- a/src/layer/vulkan/shader/innerproduct_reduce_sum8_pack8.comp +++ b/src/layer/vulkan/shader/innerproduct_reduce_sum8_pack8.comp @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #extension GL_EXT_shader_explicit_arithmetic_types_float16: require #endif +#extension GL_GOOGLE_include_directive: enable +#include "vulkan_activation.comp" + layout (constant_id = 0) const int bias_term = 0; layout (constant_id = 1) const int activation_type = 0; layout (constant_id = 2) const float activation_param_0 = 0; @@ -89,41 +92,7 @@ void main() } #endif - if (activation_type == 1) - { - sum[0] = max(sum[0], afp(0.f)); - sum[1] = max(sum[1], afp(0.f)); - } - if (activation_type == 2) - { - const afp slope = afp(activation_param_0); - sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); - sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); - } - if (activation_type == 3) - { - const afp const_min = afp(activation_param_0); - const afp const_max = afp(activation_param_1); - sum[0] = clamp(sum[0], const_min, const_max); - sum[1] = clamp(sum[1], const_min, const_max); - } - if (activation_type == 4) - { - sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); - sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); - } - if (activation_type == 5) - { - sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); - sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); - } - if (activation_type == 6) - { - const afp alpha = afp(activation_param_0); - const afp beta = afp(activation_param_1); - sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - } + sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); #if NCNN_image_shader image3d_st8(top_blob, ivec3(gx, 0, 0), sum); diff --git a/src/layer/vulkan/shader/mish.comp b/src/layer/vulkan/shader/mish.comp index ffcceeb47..c6599d94a 100644 --- a/src/layer/vulkan/shader/mish.comp +++ b/src/layer/vulkan/shader/mish.comp @@ -61,7 +61,11 @@ void main() afp v = buffer_ld1(bottom_top_blob_data, gi); #endif +#if NCNN_moltenvk + v = v * afp(tanh(float(log(exp(v) + afp(1.f))))); +#else v = v * tanh(log(exp(v) + afp(1.f))); +#endif #if NCNN_image_shader image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); diff --git a/src/layer/vulkan/shader/mish_pack4.comp b/src/layer/vulkan/shader/mish_pack4.comp index eea35afed..066060278 100644 --- a/src/layer/vulkan/shader/mish_pack4.comp +++ b/src/layer/vulkan/shader/mish_pack4.comp @@ -61,7 +61,11 @@ void main() afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); #endif +#if NCNN_moltenvk + v = v * afpvec4(tanh(vec4(log(exp(v) + afpvec4(1.f))))); +#else v = v * tanh(log(exp(v) + afpvec4(1.f))); +#endif #if NCNN_image_shader image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); diff --git a/src/layer/vulkan/shader/mish_pack8.comp b/src/layer/vulkan/shader/mish_pack8.comp index e94d4542c..0da7c84f6 100644 --- a/src/layer/vulkan/shader/mish_pack8.comp +++ b/src/layer/vulkan/shader/mish_pack8.comp @@ -62,8 +62,13 @@ void main() afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); #endif +#if NCNN_moltenvk + v[0] = v[0] * afpvec4(tanh(vec4(log(exp(v[0]) + afpvec4(1.f))))); + v[1] = v[1] * afpvec4(tanh(vec4(log(exp(v[1]) + afpvec4(1.f))))); +#else v[0] = v[0] * tanh(log(exp(v[0]) + afpvec4(1.f))); v[1] = v[1] * tanh(log(exp(v[1]) + afpvec4(1.f))); +#endif #if NCNN_image_shader image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); diff --git a/src/layer/vulkan/shader/tanh.comp b/src/layer/vulkan/shader/tanh.comp index 46f1e5543..c151f8679 100644 --- a/src/layer/vulkan/shader/tanh.comp +++ b/src/layer/vulkan/shader/tanh.comp @@ -61,7 +61,11 @@ void main() afp v = buffer_ld1(bottom_top_blob_data, gi); #endif +#if NCNN_moltenvk + v = afp(tanh(float(v))); +#else v = tanh(v); +#endif #if NCNN_image_shader image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); diff --git a/src/layer/vulkan/shader/tanh_pack4.comp b/src/layer/vulkan/shader/tanh_pack4.comp index 732d70016..63a9c45c0 100644 --- a/src/layer/vulkan/shader/tanh_pack4.comp +++ b/src/layer/vulkan/shader/tanh_pack4.comp @@ -61,7 +61,11 @@ void main() afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); #endif +#if NCNN_moltenvk + v = afpvec4(tanh(vec4(v))); +#else v = tanh(v); +#endif #if NCNN_image_shader image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); diff --git a/src/layer/vulkan/shader/tanh_pack8.comp b/src/layer/vulkan/shader/tanh_pack8.comp index 382c6a1ff..1de55bb74 100644 --- a/src/layer/vulkan/shader/tanh_pack8.comp +++ b/src/layer/vulkan/shader/tanh_pack8.comp @@ -62,8 +62,13 @@ void main() afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); #endif +#if NCNN_moltenvk + v[0] = afpvec4(tanh(vec4(v[0]))); + v[1] = afpvec4(tanh(vec4(v[1]))); +#else v[0] = tanh(v[0]); v[1] = tanh(v[1]); +#endif #if NCNN_image_shader image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); diff --git a/src/layer/vulkan/shader/unaryop.comp b/src/layer/vulkan/shader/unaryop.comp index 44e0c544a..9bee389c6 100644 --- a/src/layer/vulkan/shader/unaryop.comp +++ b/src/layer/vulkan/shader/unaryop.comp @@ -81,7 +81,11 @@ void main() if (op_type == 13) res = acos(v); if (op_type == 14) res = atan(v); if (op_type == 15) res = afp(1.f) / v; +#if NCNN_moltenvk + if (op_type == 16) res = afp(tanh(float(v))); +#else if (op_type == 16) res = tanh(v); +#endif #if NCNN_image_shader image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); diff --git a/src/layer/vulkan/shader/unaryop_pack4.comp b/src/layer/vulkan/shader/unaryop_pack4.comp index 1d6e71c22..f04c81618 100644 --- a/src/layer/vulkan/shader/unaryop_pack4.comp +++ b/src/layer/vulkan/shader/unaryop_pack4.comp @@ -81,7 +81,11 @@ void main() if (op_type == 13) res = acos(v); if (op_type == 14) res = atan(v); if (op_type == 15) res = afp(1.f) / v; +#if NCNN_moltenvk + if (op_type == 16) res = afpvec4(tanh(vec4(v))); +#else if (op_type == 16) res = tanh(v); +#endif #if NCNN_image_shader image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); diff --git a/src/layer/vulkan/shader/unaryop_pack8.comp b/src/layer/vulkan/shader/unaryop_pack8.comp index d888ea2b2..ebca872f7 100644 --- a/src/layer/vulkan/shader/unaryop_pack8.comp +++ b/src/layer/vulkan/shader/unaryop_pack8.comp @@ -148,8 +148,13 @@ void main() } if (op_type == 16) { +#if NCNN_moltenvk + res[0] = afpvec4(tanh(vec4(v[0]))); + res[1] = afpvec4(tanh(vec4(v[1]))); +#else res[0] = tanh(v[0]); res[1] = tanh(v[1]); +#endif } #if NCNN_image_shader diff --git a/src/layer/vulkan/shader/vulkan_activation.comp b/src/layer/vulkan/shader/vulkan_activation.comp new file mode 100644 index 000000000..735be9bec --- /dev/null +++ b/src/layer/vulkan/shader/vulkan_activation.comp @@ -0,0 +1,142 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_VULKAN_ACTIVATION_COMP +#define NCNN_VULKAN_ACTIVATION_COMP + +afp activation_afp(afp v, int activation_type, float activation_param_0, float activation_param_1) +{ + if (activation_type == 1) + { + v = max(v, afp(0.f)); + } + if (activation_type == 2) + { + const afp slope = afp(activation_param_0); + v = v < afp(0.f) ? v * slope : v; + } + if (activation_type == 3) + { + const afp const_min = afp(activation_param_0); + const afp const_max = afp(activation_param_1); + v = clamp(v, const_min, const_max); + } + if (activation_type == 4) + { + v = afp(1.f) / (afp(1.f) + exp(-v)); + } + if (activation_type == 5) + { +#if NCNN_moltenvk + v = v * afp(tanh(float(log(exp(v) + afp(1.f))))); +#else + v = v * tanh(log(exp(v) + afp(1.f))); +#endif + } + if (activation_type == 6) + { + const afp alpha = afp(activation_param_0); + const afp beta = afp(activation_param_1); + v = v * clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); + } + + return v; +} + +afpvec4 activation_afpvec4(afpvec4 v, int activation_type, float activation_param_0, float activation_param_1) +{ + if (activation_type == 1) + { + v = max(v, afp(0.f)); + } + if (activation_type == 2) + { + const afp slope = afp(activation_param_0); + v = mix(v, v * afp(slope), lessThan(v, afpvec4(0.f))); + } + if (activation_type == 3) + { + const afp const_min = afp(activation_param_0); + const afp const_max = afp(activation_param_1); + v = clamp(v, const_min, const_max); + } + if (activation_type == 4) + { + v = afp(1.f) / (afp(1.f) + exp(-v)); + } + if (activation_type == 5) + { +#if NCNN_moltenvk + v = v * afpvec4(tanh(vec4(log(exp(v) + afp(1.f))))); +#else + v = v * tanh(log(exp(v) + afp(1.f))); +#endif + } + if (activation_type == 6) + { + const afp alpha = afp(activation_param_0); + const afp beta = afp(activation_param_1); + v = v * clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); + } + + return v; +} + +afpvec8 activation_afpvec8(afpvec8 v, int activation_type, float activation_param_0, float activation_param_1) +{ + if (activation_type == 1) + { + v[0] = max(v[0], afp(0.f)); + v[1] = max(v[1], afp(0.f)); + } + if (activation_type == 2) + { + const afp slope = afp(activation_param_0); + v[0] = mix(v[0], v[0] * afp(slope), lessThan(v[0], afpvec4(0.f))); + v[1] = mix(v[1], v[1] * afp(slope), lessThan(v[1], afpvec4(0.f))); + } + if (activation_type == 3) + { + const afp const_min = afp(activation_param_0); + const afp const_max = afp(activation_param_1); + v[0] = clamp(v[0], const_min, const_max); + v[1] = clamp(v[1], const_min, const_max); + } + if (activation_type == 4) + { + v[0] = afp(1.f) / (afp(1.f) + exp(-v[0])); + v[1] = afp(1.f) / (afp(1.f) + exp(-v[1])); + } + if (activation_type == 5) + { +#if NCNN_moltenvk + v[0] = v[0] * afpvec4(tanh(vec4(log(exp(v[0]) + afp(1.f))))); + v[1] = v[1] * afpvec4(tanh(vec4(log(exp(v[1]) + afp(1.f))))); +#else + v[0] = v[0] * tanh(log(exp(v[0]) + afp(1.f))); + v[1] = v[1] * tanh(log(exp(v[1]) + afp(1.f))); +#endif + } + if (activation_type == 6) + { + const afp alpha = afp(activation_param_0); + const afp beta = afp(activation_param_1); + v[0] = v[0] * clamp(v[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); + v[1] = v[1] * clamp(v[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); + } + + return v; +} + +#endif // NCNN_VULKAN_ACTIVATION_COMP