| @@ -5,7 +5,9 @@ file(READ ${SHADER_SRC} comp_data) | |||
| # skip leading comment | |||
| string(FIND "${comp_data}" "#version" version_start) | |||
| string(SUBSTRING "${comp_data}" ${version_start} -1 comp_data) | |||
| if(NOT ${version_start} EQUAL -1) | |||
| string(SUBSTRING "${comp_data}" ${version_start} -1 comp_data) | |||
| endif() | |||
| # remove whitespace | |||
| string(REGEX REPLACE "\n +" "\n" comp_data "${comp_data}") | |||
| @@ -157,6 +157,7 @@ ncnn_add_layer(DeconvolutionDepthWise3D) | |||
| if(NCNN_VULKAN) | |||
| ncnn_add_shader(${CMAKE_CURRENT_SOURCE_DIR}/convert_ycbcr.comp) | |||
| ncnn_add_shader(${CMAKE_CURRENT_SOURCE_DIR}/layer/vulkan/shader/vulkan_activation.comp) | |||
| endif() | |||
| add_custom_target(ncnn-generate-spirv DEPENDS ${NCNN_SHADER_SPV_HEX_FILES}) | |||
| @@ -23,6 +23,8 @@ | |||
| #include "glslang/SPIRV/GlslangToSpv.h" | |||
| #include "glslang/glslang/Public/ShaderLang.h" | |||
| #include "vulkan_activation.comp.hex.h" | |||
| #include "command.h" | |||
| #include "layer.h" | |||
| #include "layer/vulkan/packing_vulkan.h" | |||
| @@ -1725,7 +1727,7 @@ int VulkanDevicePrivate::create_dummy_buffer_image() | |||
| dummy_buffer.create(1, 4u, dummy_allocator); | |||
| dummy_image.create(1, 4u, dummy_allocator); | |||
| #if __APPLE__ | |||
| if (vkdev->info.vendor_id() != 0x8086) | |||
| if (vkdev->info.type() == 0) | |||
| dummy_image_readonly.create(1, 4u, dummy_allocator); | |||
| #else | |||
| dummy_image_readonly.create(1, 4u, dummy_allocator); | |||
| @@ -1736,7 +1738,7 @@ int VulkanDevicePrivate::create_dummy_buffer_image() | |||
| cmd.record_dummy(dummy_buffer); | |||
| cmd.record_dummy(dummy_image); | |||
| #if __APPLE__ | |||
| if (vkdev->info.vendor_id() != 0x8086) | |||
| if (vkdev->info.type() == 0) | |||
| cmd.record_dummy_readonly(dummy_image_readonly); | |||
| #else | |||
| cmd.record_dummy_readonly(dummy_image_readonly); | |||
| @@ -1752,7 +1754,7 @@ void VulkanDevicePrivate::destroy_dummy_buffer_image() | |||
| dummy_buffer.release(); | |||
| dummy_image.release(); | |||
| #if __APPLE__ | |||
| if (vkdev->info.vendor_id() != 0x8086) | |||
| if (vkdev->info.type() == 0) | |||
| dummy_image_readonly.release(); | |||
| #else | |||
| dummy_image_readonly.release(); | |||
| @@ -2818,7 +2820,7 @@ VkImageMat VulkanDevice::get_dummy_image() const | |||
| VkImageMat VulkanDevice::get_dummy_image_readonly() const | |||
| { | |||
| #if __APPLE__ | |||
| if (info.vendor_id() == 0x8086) | |||
| if (info.type() != 0) | |||
| return d->dummy_image; | |||
| #endif | |||
| return d->dummy_image_readonly; | |||
| @@ -3245,6 +3247,28 @@ static TBuiltInResource get_default_TBuiltInResource() | |||
| return resource; | |||
| } | |||
| class VulkanShaderIncluder : public glslang::TShader::Includer | |||
| { | |||
| public: | |||
| virtual glslang::TShader::Includer::IncludeResult* includeLocal(const char* headerName, const char* /*includerName*/, size_t /*inclusionDepth*/) | |||
| { | |||
| if (strcmp(headerName, "vulkan_activation.comp") == 0) | |||
| { | |||
| const char* const headerData = vulkan_activation_comp_data; | |||
| const size_t headerLength = sizeof(vulkan_activation_comp_data); | |||
| glslang::TShader::Includer::IncludeResult* r = new glslang::TShader::Includer::IncludeResult(headerName, headerData, headerLength, 0); | |||
| return r; | |||
| } | |||
| return 0; | |||
| } | |||
| virtual void releaseInclude(glslang::TShader::Includer::IncludeResult* r) | |||
| { | |||
| delete r; | |||
| } | |||
| }; | |||
| int compile_spirv_module(const char* comp_string, const Option& opt, std::vector<uint32_t>& spirv) | |||
| { | |||
| // -1 for omitting the tail '\0' | |||
| @@ -3699,6 +3723,10 @@ int compile_spirv_module(const char* comp_data, int comp_data_size, const Option | |||
| custom_defines.push_back(std::make_pair("NCNN_shader_local_memory", "1")); | |||
| } | |||
| #if __APPLE__ | |||
| custom_defines.push_back(std::make_pair("NCNN_moltenvk", "1")); | |||
| #endif | |||
| std::string preamble; | |||
| std::vector<std::string> processes; | |||
| @@ -3740,7 +3768,9 @@ int compile_spirv_module(const char* comp_data, int comp_data_size, const Option | |||
| TBuiltInResource resources = get_default_TBuiltInResource(); | |||
| bool pr = s.parse(&resources, 100, false, EShMsgDefault); | |||
| VulkanShaderIncluder includer; | |||
| bool pr = s.parse(&resources, 100, ENoProfile, false, false, EShMsgDefault, includer); | |||
| if (!pr) | |||
| { | |||
| NCNN_LOGE("compile spir-v module failed"); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -195,77 +198,14 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| sum4 = max(sum4, afp(0.f)); | |||
| sum5 = max(sum5, afp(0.f)); | |||
| sum6 = max(sum6, afp(0.f)); | |||
| sum7 = max(sum7, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = sum0 < afp(0.f) ? sum0 * slope : sum0; | |||
| sum1 = sum1 < afp(0.f) ? sum1 * slope : sum1; | |||
| sum2 = sum2 < afp(0.f) ? sum2 * slope : sum2; | |||
| sum3 = sum3 < afp(0.f) ? sum3 * slope : sum3; | |||
| sum4 = sum4 < afp(0.f) ? sum4 * slope : sum4; | |||
| sum5 = sum5 < afp(0.f) ? sum5 * slope : sum5; | |||
| sum6 = sum6 < afp(0.f) ? sum6 * slope : sum6; | |||
| sum7 = sum7 < afp(0.f) ? sum7 * slope : sum7; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| sum4 = clamp(sum4, const_min, const_max); | |||
| sum5 = clamp(sum5, const_min, const_max); | |||
| sum6 = clamp(sum6, const_min, const_max); | |||
| sum7 = clamp(sum7, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| sum4 = afp(1.f) / (afp(1.f) + exp(-sum4)); | |||
| sum5 = afp(1.f) / (afp(1.f) + exp(-sum5)); | |||
| sum6 = afp(1.f) / (afp(1.f) + exp(-sum6)); | |||
| sum7 = afp(1.f) / (afp(1.f) + exp(-sum7)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| sum4 = sum4 * tanh(log(exp(sum4) + afp(1.f))); | |||
| sum5 = sum5 * tanh(log(exp(sum5) + afp(1.f))); | |||
| sum6 = sum6 * tanh(log(exp(sum6) + afp(1.f))); | |||
| sum7 = sum7 * tanh(log(exp(sum7) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum4 = sum4 * clamp(sum4 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum5 = sum5 * clamp(sum5 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum6 = sum6 * clamp(sum6 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum7 = sum7 * clamp(sum7 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1); | |||
| sum4 = activation_afp(sum4, activation_type, activation_param_0, activation_param_1); | |||
| sum5 = activation_afp(sum5, activation_type, activation_param_0, activation_param_1); | |||
| sum6 = activation_afp(sum6, activation_type, activation_param_0, activation_param_1); | |||
| sum7 = activation_afp(sum7, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -130,36 +133,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(sx4.r, sy4.r, gy), sum.r); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -149,53 +152,10 @@ void main() | |||
| v11 = m11 - m12 + m13; | |||
| } | |||
| if (activation_type == 1) | |||
| { | |||
| v00 = max(v00, afp(0.f)); | |||
| v10 = max(v10, afp(0.f)); | |||
| v01 = max(v01, afp(0.f)); | |||
| v11 = max(v11, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| v00 = v00 < afp(0.f) ? v00 * slope : v00; | |||
| v10 = v10 < afp(0.f) ? v10 * slope : v10; | |||
| v01 = v01 < afp(0.f) ? v01 * slope : v01; | |||
| v11 = v11 < afp(0.f) ? v11 * slope : v11; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| v00 = clamp(v00, const_min, const_max); | |||
| v10 = clamp(v10, const_min, const_max); | |||
| v01 = clamp(v01, const_min, const_max); | |||
| v11 = clamp(v11, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| v00 = afp(1.f) / (afp(1.f) + exp(-v00)); | |||
| v10 = afp(1.f) / (afp(1.f) + exp(-v10)); | |||
| v01 = afp(1.f) / (afp(1.f) + exp(-v01)); | |||
| v11 = afp(1.f) / (afp(1.f) + exp(-v11)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| v00 = v00 * tanh(log(exp(v00) + afp(1.f))); | |||
| v01 = v01 * tanh(log(exp(v01) + afp(1.f))); | |||
| v10 = v10 * tanh(log(exp(v10) + afp(1.f))); | |||
| v11 = v11 * tanh(log(exp(v11) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| v00 = v00 * clamp(v00 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v01 = v01 * clamp(v01 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v10 = v10 * clamp(v10 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v11 = v11 * clamp(v11 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| v00 = activation_afp(v00, activation_type, activation_param_0, activation_param_1); | |||
| v10 = activation_afp(v10, activation_type, activation_param_0, activation_param_1); | |||
| v01 = activation_afp(v01, activation_type, activation_param_0, activation_param_1); | |||
| v11 = activation_afp(v11, activation_type, activation_param_0, activation_param_1); | |||
| // store 2x2 | |||
| int x = gx * 2; | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -237,125 +240,22 @@ void main() | |||
| v33 = bias_value + v33; | |||
| } | |||
| if (activation_type == 1) | |||
| { | |||
| v00 = max(v00, afp(0.f)); | |||
| v01 = max(v01, afp(0.f)); | |||
| v02 = max(v02, afp(0.f)); | |||
| v03 = max(v03, afp(0.f)); | |||
| v10 = max(v10, afp(0.f)); | |||
| v11 = max(v11, afp(0.f)); | |||
| v12 = max(v12, afp(0.f)); | |||
| v13 = max(v13, afp(0.f)); | |||
| v20 = max(v20, afp(0.f)); | |||
| v21 = max(v21, afp(0.f)); | |||
| v22 = max(v22, afp(0.f)); | |||
| v23 = max(v23, afp(0.f)); | |||
| v30 = max(v30, afp(0.f)); | |||
| v31 = max(v31, afp(0.f)); | |||
| v32 = max(v32, afp(0.f)); | |||
| v33 = max(v33, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| v00 = v00 < afp(0.f) ? v00 * slope : v00; | |||
| v01 = v01 < afp(0.f) ? v01 * slope : v01; | |||
| v02 = v02 < afp(0.f) ? v02 * slope : v02; | |||
| v03 = v03 < afp(0.f) ? v03 * slope : v03; | |||
| v10 = v10 < afp(0.f) ? v10 * slope : v10; | |||
| v11 = v11 < afp(0.f) ? v11 * slope : v11; | |||
| v12 = v12 < afp(0.f) ? v12 * slope : v12; | |||
| v13 = v13 < afp(0.f) ? v13 * slope : v13; | |||
| v20 = v20 < afp(0.f) ? v20 * slope : v20; | |||
| v21 = v21 < afp(0.f) ? v21 * slope : v21; | |||
| v22 = v22 < afp(0.f) ? v22 * slope : v22; | |||
| v23 = v23 < afp(0.f) ? v23 * slope : v23; | |||
| v30 = v30 < afp(0.f) ? v30 * slope : v30; | |||
| v31 = v31 < afp(0.f) ? v31 * slope : v31; | |||
| v32 = v32 < afp(0.f) ? v32 * slope : v32; | |||
| v33 = v33 < afp(0.f) ? v33 * slope : v33; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| v00 = clamp(v00, const_min, const_max); | |||
| v01 = clamp(v01, const_min, const_max); | |||
| v02 = clamp(v02, const_min, const_max); | |||
| v03 = clamp(v03, const_min, const_max); | |||
| v10 = clamp(v10, const_min, const_max); | |||
| v11 = clamp(v11, const_min, const_max); | |||
| v12 = clamp(v12, const_min, const_max); | |||
| v13 = clamp(v13, const_min, const_max); | |||
| v20 = clamp(v20, const_min, const_max); | |||
| v21 = clamp(v21, const_min, const_max); | |||
| v22 = clamp(v22, const_min, const_max); | |||
| v23 = clamp(v23, const_min, const_max); | |||
| v30 = clamp(v30, const_min, const_max); | |||
| v31 = clamp(v31, const_min, const_max); | |||
| v32 = clamp(v32, const_min, const_max); | |||
| v33 = clamp(v33, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| v00 = afp(1.f) / (afp(1.f) + exp(-v00)); | |||
| v01 = afp(1.f) / (afp(1.f) + exp(-v01)); | |||
| v02 = afp(1.f) / (afp(1.f) + exp(-v02)); | |||
| v03 = afp(1.f) / (afp(1.f) + exp(-v03)); | |||
| v10 = afp(1.f) / (afp(1.f) + exp(-v10)); | |||
| v11 = afp(1.f) / (afp(1.f) + exp(-v11)); | |||
| v12 = afp(1.f) / (afp(1.f) + exp(-v12)); | |||
| v13 = afp(1.f) / (afp(1.f) + exp(-v13)); | |||
| v20 = afp(1.f) / (afp(1.f) + exp(-v20)); | |||
| v21 = afp(1.f) / (afp(1.f) + exp(-v21)); | |||
| v22 = afp(1.f) / (afp(1.f) + exp(-v22)); | |||
| v23 = afp(1.f) / (afp(1.f) + exp(-v23)); | |||
| v30 = afp(1.f) / (afp(1.f) + exp(-v30)); | |||
| v31 = afp(1.f) / (afp(1.f) + exp(-v31)); | |||
| v32 = afp(1.f) / (afp(1.f) + exp(-v32)); | |||
| v33 = afp(1.f) / (afp(1.f) + exp(-v33)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| v00 = v00 * tanh(log(exp(v00) + afp(1.f))); | |||
| v01 = v01 * tanh(log(exp(v01) + afp(1.f))); | |||
| v02 = v02 * tanh(log(exp(v02) + afp(1.f))); | |||
| v03 = v03 * tanh(log(exp(v03) + afp(1.f))); | |||
| v10 = v10 * tanh(log(exp(v10) + afp(1.f))); | |||
| v11 = v11 * tanh(log(exp(v11) + afp(1.f))); | |||
| v12 = v12 * tanh(log(exp(v12) + afp(1.f))); | |||
| v13 = v13 * tanh(log(exp(v13) + afp(1.f))); | |||
| v20 = v20 * tanh(log(exp(v20) + afp(1.f))); | |||
| v21 = v21 * tanh(log(exp(v21) + afp(1.f))); | |||
| v22 = v22 * tanh(log(exp(v22) + afp(1.f))); | |||
| v23 = v23 * tanh(log(exp(v23) + afp(1.f))); | |||
| v30 = v30 * tanh(log(exp(v30) + afp(1.f))); | |||
| v31 = v31 * tanh(log(exp(v31) + afp(1.f))); | |||
| v32 = v32 * tanh(log(exp(v32) + afp(1.f))); | |||
| v33 = v33 * tanh(log(exp(v33) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| v00 = v00 * clamp(v00 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v01 = v01 * clamp(v01 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v02 = v02 * clamp(v02 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v03 = v03 * clamp(v03 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v10 = v10 * clamp(v10 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v11 = v11 * clamp(v11 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v12 = v12 * clamp(v12 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v13 = v13 * clamp(v13 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v20 = v20 * clamp(v20 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v21 = v21 * clamp(v21 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v22 = v22 * clamp(v22 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v23 = v23 * clamp(v23 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v30 = v30 * clamp(v30 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v31 = v31 * clamp(v31 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v32 = v32 * clamp(v32 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v33 = v33 * clamp(v33 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| v00 = activation_afp(v00, activation_type, activation_param_0, activation_param_1); | |||
| v01 = activation_afp(v01, activation_type, activation_param_0, activation_param_1); | |||
| v02 = activation_afp(v02, activation_type, activation_param_0, activation_param_1); | |||
| v03 = activation_afp(v03, activation_type, activation_param_0, activation_param_1); | |||
| v10 = activation_afp(v10, activation_type, activation_param_0, activation_param_1); | |||
| v11 = activation_afp(v11, activation_type, activation_param_0, activation_param_1); | |||
| v12 = activation_afp(v12, activation_type, activation_param_0, activation_param_1); | |||
| v13 = activation_afp(v13, activation_type, activation_param_0, activation_param_1); | |||
| v20 = activation_afp(v20, activation_type, activation_param_0, activation_param_1); | |||
| v21 = activation_afp(v21, activation_type, activation_param_0, activation_param_1); | |||
| v22 = activation_afp(v22, activation_type, activation_param_0, activation_param_1); | |||
| v23 = activation_afp(v23, activation_type, activation_param_0, activation_param_1); | |||
| v30 = activation_afp(v30, activation_type, activation_param_0, activation_param_1); | |||
| v31 = activation_afp(v31, activation_type, activation_param_0, activation_param_1); | |||
| v32 = activation_afp(v32, activation_type, activation_param_0, activation_param_1); | |||
| v33 = activation_afp(v33, activation_type, activation_param_0, activation_param_1); | |||
| // store 4x4 | |||
| int x = gx * 4; | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| #define LOCAL_MEMORY_UNROLL_INCH 8 | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| @@ -280,53 +283,10 @@ void main() | |||
| return; | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = sum0 < afp(0.f) ? sum0 * slope : sum0; | |||
| sum1 = sum1 < afp(0.f) ? sum1 * slope : sum1; | |||
| sum2 = sum2 < afp(0.f) ? sum2 * slope : sum2; | |||
| sum3 = sum3 < afp(0.f) ? sum3 * slope : sum3; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -195,77 +198,14 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| sum4 = max(sum4, afp(0.f)); | |||
| sum5 = max(sum5, afp(0.f)); | |||
| sum6 = max(sum6, afp(0.f)); | |||
| sum7 = max(sum7, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); | |||
| sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); | |||
| sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); | |||
| sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); | |||
| sum4 = mix(sum4, sum4 * afp(slope), lessThan(sum4, afpvec4(0.f))); | |||
| sum5 = mix(sum5, sum5 * afp(slope), lessThan(sum5, afpvec4(0.f))); | |||
| sum6 = mix(sum6, sum6 * afp(slope), lessThan(sum6, afpvec4(0.f))); | |||
| sum7 = mix(sum7, sum7 * afp(slope), lessThan(sum7, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| sum4 = clamp(sum4, const_min, const_max); | |||
| sum5 = clamp(sum5, const_min, const_max); | |||
| sum6 = clamp(sum6, const_min, const_max); | |||
| sum7 = clamp(sum7, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| sum4 = afp(1.f) / (afp(1.f) + exp(-sum4)); | |||
| sum5 = afp(1.f) / (afp(1.f) + exp(-sum5)); | |||
| sum6 = afp(1.f) / (afp(1.f) + exp(-sum6)); | |||
| sum7 = afp(1.f) / (afp(1.f) + exp(-sum7)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| sum4 = sum4 * tanh(log(exp(sum4) + afp(1.f))); | |||
| sum5 = sum5 * tanh(log(exp(sum5) + afp(1.f))); | |||
| sum6 = sum6 * tanh(log(exp(sum6) + afp(1.f))); | |||
| sum7 = sum7 * tanh(log(exp(sum7) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum4 = sum4 * clamp(sum4 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum5 = sum5 * clamp(sum5 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum6 = sum6 * clamp(sum6 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum7 = sum7 * clamp(sum7 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); | |||
| sum4 = activation_afpvec4(sum4, activation_type, activation_param_0, activation_param_1); | |||
| sum5 = activation_afpvec4(sum5, activation_type, activation_param_0, activation_param_1); | |||
| sum6 = activation_afpvec4(sum6, activation_type, activation_param_0, activation_param_1); | |||
| sum7 = activation_afpvec4(sum7, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| #define LOCAL_MEMORY_UNROLL_INCH 8 | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| @@ -237,53 +240,10 @@ void main() | |||
| #endif | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); | |||
| sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); | |||
| sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); | |||
| sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| #define LOCAL_MEMORY_UNROLL_INCH 8 | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| @@ -280,53 +283,10 @@ void main() | |||
| return; | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); | |||
| sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); | |||
| sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); | |||
| sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -214,125 +217,14 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0[0] = max(sum0[0], afp(0.f)); | |||
| sum0[1] = max(sum0[1], afp(0.f)); | |||
| sum1[0] = max(sum1[0], afp(0.f)); | |||
| sum1[1] = max(sum1[1], afp(0.f)); | |||
| sum2[0] = max(sum2[0], afp(0.f)); | |||
| sum2[1] = max(sum2[1], afp(0.f)); | |||
| sum3[0] = max(sum3[0], afp(0.f)); | |||
| sum3[1] = max(sum3[1], afp(0.f)); | |||
| sum4[0] = max(sum4[0], afp(0.f)); | |||
| sum4[1] = max(sum4[1], afp(0.f)); | |||
| sum5[0] = max(sum5[0], afp(0.f)); | |||
| sum5[1] = max(sum5[1], afp(0.f)); | |||
| sum6[0] = max(sum6[0], afp(0.f)); | |||
| sum6[1] = max(sum6[1], afp(0.f)); | |||
| sum7[0] = max(sum7[0], afp(0.f)); | |||
| sum7[1] = max(sum7[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); | |||
| sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); | |||
| sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); | |||
| sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); | |||
| sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); | |||
| sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); | |||
| sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); | |||
| sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); | |||
| sum4[0] = mix(sum4[0], sum4[0] * afp(slope), lessThan(sum4[0], afpvec4(0.f))); | |||
| sum4[1] = mix(sum4[1], sum4[1] * afp(slope), lessThan(sum4[1], afpvec4(0.f))); | |||
| sum5[0] = mix(sum5[0], sum5[0] * afp(slope), lessThan(sum5[0], afpvec4(0.f))); | |||
| sum5[1] = mix(sum5[1], sum5[1] * afp(slope), lessThan(sum5[1], afpvec4(0.f))); | |||
| sum6[0] = mix(sum6[0], sum6[0] * afp(slope), lessThan(sum6[0], afpvec4(0.f))); | |||
| sum6[1] = mix(sum6[1], sum6[1] * afp(slope), lessThan(sum6[1], afpvec4(0.f))); | |||
| sum7[0] = mix(sum7[0], sum7[0] * afp(slope), lessThan(sum7[0], afpvec4(0.f))); | |||
| sum7[1] = mix(sum7[1], sum7[1] * afp(slope), lessThan(sum7[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0[0] = clamp(sum0[0], const_min, const_max); | |||
| sum0[1] = clamp(sum0[1], const_min, const_max); | |||
| sum1[0] = clamp(sum1[0], const_min, const_max); | |||
| sum1[1] = clamp(sum1[1], const_min, const_max); | |||
| sum2[0] = clamp(sum2[0], const_min, const_max); | |||
| sum2[1] = clamp(sum2[1], const_min, const_max); | |||
| sum3[0] = clamp(sum3[0], const_min, const_max); | |||
| sum3[1] = clamp(sum3[1], const_min, const_max); | |||
| sum4[0] = clamp(sum4[0], const_min, const_max); | |||
| sum4[1] = clamp(sum4[1], const_min, const_max); | |||
| sum5[0] = clamp(sum5[0], const_min, const_max); | |||
| sum5[1] = clamp(sum5[1], const_min, const_max); | |||
| sum6[0] = clamp(sum6[0], const_min, const_max); | |||
| sum6[1] = clamp(sum6[1], const_min, const_max); | |||
| sum7[0] = clamp(sum7[0], const_min, const_max); | |||
| sum7[1] = clamp(sum7[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); | |||
| sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); | |||
| sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); | |||
| sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); | |||
| sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); | |||
| sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); | |||
| sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); | |||
| sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); | |||
| sum4[0] = afp(1.f) / (afp(1.f) + exp(-sum4[0])); | |||
| sum4[1] = afp(1.f) / (afp(1.f) + exp(-sum4[1])); | |||
| sum5[0] = afp(1.f) / (afp(1.f) + exp(-sum5[0])); | |||
| sum5[1] = afp(1.f) / (afp(1.f) + exp(-sum5[1])); | |||
| sum6[0] = afp(1.f) / (afp(1.f) + exp(-sum6[0])); | |||
| sum6[1] = afp(1.f) / (afp(1.f) + exp(-sum6[1])); | |||
| sum7[0] = afp(1.f) / (afp(1.f) + exp(-sum7[0])); | |||
| sum7[1] = afp(1.f) / (afp(1.f) + exp(-sum7[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); | |||
| sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); | |||
| sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); | |||
| sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); | |||
| sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); | |||
| sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); | |||
| sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); | |||
| sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); | |||
| sum4[0] = sum4[0] * tanh(log(exp(sum4[0]) + afp(1.f))); | |||
| sum4[1] = sum4[1] * tanh(log(exp(sum4[1]) + afp(1.f))); | |||
| sum5[0] = sum5[0] * tanh(log(exp(sum5[0]) + afp(1.f))); | |||
| sum5[1] = sum5[1] * tanh(log(exp(sum5[1]) + afp(1.f))); | |||
| sum6[0] = sum6[0] * tanh(log(exp(sum6[0]) + afp(1.f))); | |||
| sum6[1] = sum6[1] * tanh(log(exp(sum6[1]) + afp(1.f))); | |||
| sum7[0] = sum7[0] * tanh(log(exp(sum7[0]) + afp(1.f))); | |||
| sum7[1] = sum7[1] * tanh(log(exp(sum7[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum4[0] = sum4[0] * clamp(sum4[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum4[1] = sum4[1] * clamp(sum4[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum5[0] = sum5[0] * clamp(sum5[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum5[1] = sum5[1] * clamp(sum5[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum6[0] = sum6[0] * clamp(sum6[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum6[1] = sum6[1] * clamp(sum6[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum7[0] = sum7[0] * clamp(sum7[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum7[1] = sum7[1] * clamp(sum7[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); | |||
| sum4 = activation_afpvec8(sum4, activation_type, activation_param_0, activation_param_1); | |||
| sum5 = activation_afpvec8(sum5, activation_type, activation_param_0, activation_param_1); | |||
| sum6 = activation_afpvec8(sum6, activation_type, activation_param_0, activation_param_1); | |||
| sum7 = activation_afpvec8(sum7, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -160,77 +163,10 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0[0] = max(sum0[0], afp(0.f)); | |||
| sum0[1] = max(sum0[1], afp(0.f)); | |||
| sum1[0] = max(sum1[0], afp(0.f)); | |||
| sum1[1] = max(sum1[1], afp(0.f)); | |||
| sum2[0] = max(sum2[0], afp(0.f)); | |||
| sum2[1] = max(sum2[1], afp(0.f)); | |||
| sum3[0] = max(sum3[0], afp(0.f)); | |||
| sum3[1] = max(sum3[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); | |||
| sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); | |||
| sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); | |||
| sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); | |||
| sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); | |||
| sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); | |||
| sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); | |||
| sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0[0] = clamp(sum0[0], const_min, const_max); | |||
| sum0[1] = clamp(sum0[1], const_min, const_max); | |||
| sum1[0] = clamp(sum1[0], const_min, const_max); | |||
| sum1[1] = clamp(sum1[1], const_min, const_max); | |||
| sum2[0] = clamp(sum2[0], const_min, const_max); | |||
| sum2[1] = clamp(sum2[1], const_min, const_max); | |||
| sum3[0] = clamp(sum3[0], const_min, const_max); | |||
| sum3[1] = clamp(sum3[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); | |||
| sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); | |||
| sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); | |||
| sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); | |||
| sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); | |||
| sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); | |||
| sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); | |||
| sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); | |||
| sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); | |||
| sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); | |||
| sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); | |||
| sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); | |||
| sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); | |||
| sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); | |||
| sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -183,77 +186,10 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0[0] = max(sum0[0], afp(0.f)); | |||
| sum0[1] = max(sum0[1], afp(0.f)); | |||
| sum1[0] = max(sum1[0], afp(0.f)); | |||
| sum1[1] = max(sum1[1], afp(0.f)); | |||
| sum2[0] = max(sum2[0], afp(0.f)); | |||
| sum2[1] = max(sum2[1], afp(0.f)); | |||
| sum3[0] = max(sum3[0], afp(0.f)); | |||
| sum3[1] = max(sum3[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); | |||
| sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); | |||
| sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); | |||
| sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); | |||
| sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); | |||
| sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); | |||
| sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); | |||
| sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0[0] = clamp(sum0[0], const_min, const_max); | |||
| sum0[1] = clamp(sum0[1], const_min, const_max); | |||
| sum1[0] = clamp(sum1[0], const_min, const_max); | |||
| sum1[1] = clamp(sum1[1], const_min, const_max); | |||
| sum2[0] = clamp(sum2[0], const_min, const_max); | |||
| sum2[1] = clamp(sum2[1], const_min, const_max); | |||
| sum3[0] = clamp(sum3[0], const_min, const_max); | |||
| sum3[1] = clamp(sum3[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); | |||
| sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); | |||
| sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); | |||
| sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); | |||
| sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); | |||
| sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); | |||
| sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); | |||
| sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); | |||
| sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); | |||
| sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); | |||
| sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); | |||
| sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); | |||
| sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); | |||
| sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); | |||
| sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -226,77 +229,14 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| sum4 = max(sum4, afp(0.f)); | |||
| sum5 = max(sum5, afp(0.f)); | |||
| sum6 = max(sum6, afp(0.f)); | |||
| sum7 = max(sum7, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); | |||
| sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); | |||
| sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); | |||
| sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); | |||
| sum4 = mix(sum4, sum4 * afp(slope), lessThan(sum4, afpvec4(0.f))); | |||
| sum5 = mix(sum5, sum5 * afp(slope), lessThan(sum5, afpvec4(0.f))); | |||
| sum6 = mix(sum6, sum6 * afp(slope), lessThan(sum6, afpvec4(0.f))); | |||
| sum7 = mix(sum7, sum7 * afp(slope), lessThan(sum7, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| sum4 = clamp(sum4, const_min, const_max); | |||
| sum5 = clamp(sum5, const_min, const_max); | |||
| sum6 = clamp(sum6, const_min, const_max); | |||
| sum7 = clamp(sum7, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| sum4 = afp(1.f) / (afp(1.f) + exp(-sum4)); | |||
| sum5 = afp(1.f) / (afp(1.f) + exp(-sum5)); | |||
| sum6 = afp(1.f) / (afp(1.f) + exp(-sum6)); | |||
| sum7 = afp(1.f) / (afp(1.f) + exp(-sum7)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| sum4 = sum4 * tanh(log(exp(sum4) + afp(1.f))); | |||
| sum5 = sum5 * tanh(log(exp(sum5) + afp(1.f))); | |||
| sum6 = sum6 * tanh(log(exp(sum6) + afp(1.f))); | |||
| sum7 = sum7 * tanh(log(exp(sum7) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum4 = sum4 * clamp(sum4 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum5 = sum5 * clamp(sum5 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum6 = sum6 * clamp(sum6 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum7 = sum7 * clamp(sum7 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); | |||
| sum4 = activation_afpvec4(sum4, activation_type, activation_param_0, activation_param_1); | |||
| sum5 = activation_afpvec4(sum5, activation_type, activation_param_0, activation_param_1); | |||
| sum6 = activation_afpvec4(sum6, activation_type, activation_param_0, activation_param_1); | |||
| sum7 = activation_afpvec4(sum7, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| #define LOCAL_MEMORY_UNROLL_INCH 8 | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| @@ -257,53 +260,10 @@ void main() | |||
| #endif | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); | |||
| sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); | |||
| sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); | |||
| sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| #extension GL_KHR_memory_scope_semantics: require | |||
| #extension GL_EXT_shader_explicit_arithmetic_types: require | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| @@ -238,53 +241,10 @@ void main() | |||
| afpvec4 sum2 = afpvec4(unpackHalf2x16(sum2_u2.x), unpackHalf2x16(sum2_u2.y)); | |||
| afpvec4 sum3 = afpvec4(unpackHalf2x16(sum3_u2.x), unpackHalf2x16(sum3_u2.y)); | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); | |||
| sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); | |||
| sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); | |||
| sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); | |||
| int gi = gy * psc(outcstep) + gx + lxd16 * psc(outcstep) + lxm16; | |||
| { | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -149,53 +152,10 @@ void main() | |||
| v11 = m11 - m12 + m13; | |||
| } | |||
| if (activation_type == 1) | |||
| { | |||
| v00 = max(v00, afp(0.f)); | |||
| v10 = max(v10, afp(0.f)); | |||
| v01 = max(v01, afp(0.f)); | |||
| v11 = max(v11, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| v00 = mix(v00, v00 * afp(slope), lessThan(v00, afpvec4(0.f))); | |||
| v10 = mix(v10, v10 * afp(slope), lessThan(v10, afpvec4(0.f))); | |||
| v01 = mix(v01, v01 * afp(slope), lessThan(v01, afpvec4(0.f))); | |||
| v11 = mix(v11, v11 * afp(slope), lessThan(v11, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| v00 = clamp(v00, const_min, const_max); | |||
| v10 = clamp(v10, const_min, const_max); | |||
| v01 = clamp(v01, const_min, const_max); | |||
| v11 = clamp(v11, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| v00 = afp(1.f) / (afp(1.f) + exp(-v00)); | |||
| v10 = afp(1.f) / (afp(1.f) + exp(-v10)); | |||
| v01 = afp(1.f) / (afp(1.f) + exp(-v01)); | |||
| v11 = afp(1.f) / (afp(1.f) + exp(-v11)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| v00 = v00 * tanh(log(exp(v00) + afp(1.f))); | |||
| v01 = v01 * tanh(log(exp(v01) + afp(1.f))); | |||
| v10 = v10 * tanh(log(exp(v10) + afp(1.f))); | |||
| v11 = v11 * tanh(log(exp(v11) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| v00 = v00 * clamp(v00 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v01 = v01 * clamp(v01 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v10 = v10 * clamp(v10 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v11 = v11 * clamp(v11 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| v00 = activation_afpvec4(v00, activation_type, activation_param_0, activation_param_1); | |||
| v01 = activation_afpvec4(v01, activation_type, activation_param_0, activation_param_1); | |||
| v10 = activation_afpvec4(v10, activation_type, activation_param_0, activation_param_1); | |||
| v11 = activation_afpvec4(v11, activation_type, activation_param_0, activation_param_1); | |||
| // store 2x2 | |||
| int x = gx * 2; | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -237,125 +240,22 @@ void main() | |||
| v33 = bias_value + v33; | |||
| } | |||
| if (activation_type == 1) | |||
| { | |||
| v00 = max(v00, afp(0.f)); | |||
| v01 = max(v01, afp(0.f)); | |||
| v02 = max(v02, afp(0.f)); | |||
| v03 = max(v03, afp(0.f)); | |||
| v10 = max(v10, afp(0.f)); | |||
| v11 = max(v11, afp(0.f)); | |||
| v12 = max(v12, afp(0.f)); | |||
| v13 = max(v13, afp(0.f)); | |||
| v20 = max(v20, afp(0.f)); | |||
| v21 = max(v21, afp(0.f)); | |||
| v22 = max(v22, afp(0.f)); | |||
| v23 = max(v23, afp(0.f)); | |||
| v30 = max(v30, afp(0.f)); | |||
| v31 = max(v31, afp(0.f)); | |||
| v32 = max(v32, afp(0.f)); | |||
| v33 = max(v33, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| v00 = mix(v00, v00 * afp(slope), lessThan(v00, afpvec4(0.f))); | |||
| v01 = mix(v01, v01 * afp(slope), lessThan(v01, afpvec4(0.f))); | |||
| v02 = mix(v02, v02 * afp(slope), lessThan(v02, afpvec4(0.f))); | |||
| v03 = mix(v03, v03 * afp(slope), lessThan(v03, afpvec4(0.f))); | |||
| v10 = mix(v10, v10 * afp(slope), lessThan(v10, afpvec4(0.f))); | |||
| v11 = mix(v11, v11 * afp(slope), lessThan(v11, afpvec4(0.f))); | |||
| v12 = mix(v12, v12 * afp(slope), lessThan(v12, afpvec4(0.f))); | |||
| v13 = mix(v13, v13 * afp(slope), lessThan(v13, afpvec4(0.f))); | |||
| v20 = mix(v20, v20 * afp(slope), lessThan(v20, afpvec4(0.f))); | |||
| v21 = mix(v21, v21 * afp(slope), lessThan(v21, afpvec4(0.f))); | |||
| v22 = mix(v22, v22 * afp(slope), lessThan(v22, afpvec4(0.f))); | |||
| v23 = mix(v23, v23 * afp(slope), lessThan(v23, afpvec4(0.f))); | |||
| v30 = mix(v30, v30 * afp(slope), lessThan(v30, afpvec4(0.f))); | |||
| v31 = mix(v31, v31 * afp(slope), lessThan(v31, afpvec4(0.f))); | |||
| v32 = mix(v32, v32 * afp(slope), lessThan(v32, afpvec4(0.f))); | |||
| v33 = mix(v33, v33 * afp(slope), lessThan(v33, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| v00 = clamp(v00, const_min, const_max); | |||
| v01 = clamp(v01, const_min, const_max); | |||
| v02 = clamp(v02, const_min, const_max); | |||
| v03 = clamp(v03, const_min, const_max); | |||
| v10 = clamp(v10, const_min, const_max); | |||
| v11 = clamp(v11, const_min, const_max); | |||
| v12 = clamp(v12, const_min, const_max); | |||
| v13 = clamp(v13, const_min, const_max); | |||
| v20 = clamp(v20, const_min, const_max); | |||
| v21 = clamp(v21, const_min, const_max); | |||
| v22 = clamp(v22, const_min, const_max); | |||
| v23 = clamp(v23, const_min, const_max); | |||
| v30 = clamp(v30, const_min, const_max); | |||
| v31 = clamp(v31, const_min, const_max); | |||
| v32 = clamp(v32, const_min, const_max); | |||
| v33 = clamp(v33, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| v00 = afp(1.f) / (afp(1.f) + exp(-v00)); | |||
| v01 = afp(1.f) / (afp(1.f) + exp(-v01)); | |||
| v02 = afp(1.f) / (afp(1.f) + exp(-v02)); | |||
| v03 = afp(1.f) / (afp(1.f) + exp(-v03)); | |||
| v10 = afp(1.f) / (afp(1.f) + exp(-v10)); | |||
| v11 = afp(1.f) / (afp(1.f) + exp(-v11)); | |||
| v12 = afp(1.f) / (afp(1.f) + exp(-v12)); | |||
| v13 = afp(1.f) / (afp(1.f) + exp(-v13)); | |||
| v20 = afp(1.f) / (afp(1.f) + exp(-v20)); | |||
| v21 = afp(1.f) / (afp(1.f) + exp(-v21)); | |||
| v22 = afp(1.f) / (afp(1.f) + exp(-v22)); | |||
| v23 = afp(1.f) / (afp(1.f) + exp(-v23)); | |||
| v30 = afp(1.f) / (afp(1.f) + exp(-v30)); | |||
| v31 = afp(1.f) / (afp(1.f) + exp(-v31)); | |||
| v32 = afp(1.f) / (afp(1.f) + exp(-v32)); | |||
| v33 = afp(1.f) / (afp(1.f) + exp(-v33)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| v00 = v00 * tanh(log(exp(v00) + afp(1.f))); | |||
| v01 = v01 * tanh(log(exp(v01) + afp(1.f))); | |||
| v02 = v02 * tanh(log(exp(v02) + afp(1.f))); | |||
| v03 = v03 * tanh(log(exp(v03) + afp(1.f))); | |||
| v10 = v10 * tanh(log(exp(v10) + afp(1.f))); | |||
| v11 = v11 * tanh(log(exp(v11) + afp(1.f))); | |||
| v12 = v12 * tanh(log(exp(v12) + afp(1.f))); | |||
| v13 = v13 * tanh(log(exp(v13) + afp(1.f))); | |||
| v20 = v20 * tanh(log(exp(v20) + afp(1.f))); | |||
| v21 = v21 * tanh(log(exp(v21) + afp(1.f))); | |||
| v22 = v22 * tanh(log(exp(v22) + afp(1.f))); | |||
| v23 = v23 * tanh(log(exp(v23) + afp(1.f))); | |||
| v30 = v30 * tanh(log(exp(v30) + afp(1.f))); | |||
| v31 = v31 * tanh(log(exp(v31) + afp(1.f))); | |||
| v32 = v32 * tanh(log(exp(v32) + afp(1.f))); | |||
| v33 = v33 * tanh(log(exp(v33) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| v00 = v00 * clamp(v00 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v01 = v01 * clamp(v01 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v02 = v02 * clamp(v02 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v03 = v03 * clamp(v03 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v10 = v10 * clamp(v10 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v11 = v11 * clamp(v11 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v12 = v12 * clamp(v12 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v13 = v13 * clamp(v13 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v20 = v20 * clamp(v20 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v21 = v21 * clamp(v21 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v22 = v22 * clamp(v22 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v23 = v23 * clamp(v23 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v30 = v30 * clamp(v30 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v31 = v31 * clamp(v31 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v32 = v32 * clamp(v32 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v33 = v33 * clamp(v33 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| v00 = activation_afpvec4(v00, activation_type, activation_param_0, activation_param_1); | |||
| v01 = activation_afpvec4(v01, activation_type, activation_param_0, activation_param_1); | |||
| v02 = activation_afpvec4(v02, activation_type, activation_param_0, activation_param_1); | |||
| v03 = activation_afpvec4(v03, activation_type, activation_param_0, activation_param_1); | |||
| v10 = activation_afpvec4(v10, activation_type, activation_param_0, activation_param_1); | |||
| v11 = activation_afpvec4(v11, activation_type, activation_param_0, activation_param_1); | |||
| v12 = activation_afpvec4(v12, activation_type, activation_param_0, activation_param_1); | |||
| v13 = activation_afpvec4(v13, activation_type, activation_param_0, activation_param_1); | |||
| v20 = activation_afpvec4(v20, activation_type, activation_param_0, activation_param_1); | |||
| v21 = activation_afpvec4(v21, activation_type, activation_param_0, activation_param_1); | |||
| v22 = activation_afpvec4(v22, activation_type, activation_param_0, activation_param_1); | |||
| v23 = activation_afpvec4(v23, activation_type, activation_param_0, activation_param_1); | |||
| v30 = activation_afpvec4(v30, activation_type, activation_param_0, activation_param_1); | |||
| v31 = activation_afpvec4(v31, activation_type, activation_param_0, activation_param_1); | |||
| v32 = activation_afpvec4(v32, activation_type, activation_param_0, activation_param_1); | |||
| v33 = activation_afpvec4(v33, activation_type, activation_param_0, activation_param_1); | |||
| // store 4x4 | |||
| int x = gx * 4; | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| #define LOCAL_MEMORY_UNROLL_INCH 8 | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| @@ -300,53 +303,10 @@ void main() | |||
| return; | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); | |||
| sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); | |||
| sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); | |||
| sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| #extension GL_KHR_memory_scope_semantics: require | |||
| #extension GL_EXT_shader_explicit_arithmetic_types: require | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| @@ -275,53 +278,10 @@ void main() | |||
| afpvec4 sum2 = afpvec4(unpackHalf2x16(sum2_u2.x), unpackHalf2x16(sum2_u2.y)); | |||
| afpvec4 sum3 = afpvec4(unpackHalf2x16(sum3_u2.x), unpackHalf2x16(sum3_u2.y)); | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); | |||
| sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); | |||
| sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); | |||
| sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); | |||
| int gi = gy * psc(outcstep) + gx + lxd16 * psc(outcstep) + lxm16; | |||
| { | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -195,77 +198,14 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| sum4 = max(sum4, afp(0.f)); | |||
| sum5 = max(sum5, afp(0.f)); | |||
| sum6 = max(sum6, afp(0.f)); | |||
| sum7 = max(sum7, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = sum0 < afp(0.f) ? sum0 * slope : sum0; | |||
| sum1 = sum1 < afp(0.f) ? sum1 * slope : sum1; | |||
| sum2 = sum2 < afp(0.f) ? sum2 * slope : sum2; | |||
| sum3 = sum3 < afp(0.f) ? sum3 * slope : sum3; | |||
| sum4 = sum4 < afp(0.f) ? sum4 * slope : sum4; | |||
| sum5 = sum5 < afp(0.f) ? sum5 * slope : sum5; | |||
| sum6 = sum6 < afp(0.f) ? sum6 * slope : sum6; | |||
| sum7 = sum7 < afp(0.f) ? sum7 * slope : sum7; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| sum4 = clamp(sum4, const_min, const_max); | |||
| sum5 = clamp(sum5, const_min, const_max); | |||
| sum6 = clamp(sum6, const_min, const_max); | |||
| sum7 = clamp(sum7, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| sum4 = afp(1.f) / (afp(1.f) + exp(-sum4)); | |||
| sum5 = afp(1.f) / (afp(1.f) + exp(-sum5)); | |||
| sum6 = afp(1.f) / (afp(1.f) + exp(-sum6)); | |||
| sum7 = afp(1.f) / (afp(1.f) + exp(-sum7)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| sum4 = sum4 * tanh(log(exp(sum4) + afp(1.f))); | |||
| sum5 = sum5 * tanh(log(exp(sum5) + afp(1.f))); | |||
| sum6 = sum6 * tanh(log(exp(sum6) + afp(1.f))); | |||
| sum7 = sum7 * tanh(log(exp(sum7) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum4 = sum4 * clamp(sum4 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum5 = sum5 * clamp(sum5 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum6 = sum6 * clamp(sum6 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum7 = sum7 * clamp(sum7 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1); | |||
| sum4 = activation_afp(sum4, activation_type, activation_param_0, activation_param_1); | |||
| sum5 = activation_afp(sum5, activation_type, activation_param_0, activation_param_1); | |||
| sum6 = activation_afp(sum6, activation_type, activation_param_0, activation_param_1); | |||
| sum7 = activation_afp(sum7, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| #define LOCAL_MEMORY_UNROLL_INCH 8 | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| @@ -237,53 +240,10 @@ void main() | |||
| #endif | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = sum0 < afp(0.f) ? sum0 * slope : sum0; | |||
| sum1 = sum1 < afp(0.f) ? sum1 * slope : sum1; | |||
| sum2 = sum2 < afp(0.f) ? sum2 * slope : sum2; | |||
| sum3 = sum3 < afp(0.f) ? sum3 * slope : sum3; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| #define LOCAL_MEMORY_UNROLL_INCH 8 | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| @@ -280,53 +283,10 @@ void main() | |||
| return; | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = sum0 < afp(0.f) ? sum0 * slope : sum0; | |||
| sum1 = sum1 < afp(0.f) ? sum1 * slope : sum1; | |||
| sum2 = sum2 < afp(0.f) ? sum2 * slope : sum2; | |||
| sum3 = sum3 < afp(0.f) ? sum3 * slope : sum3; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -342,125 +345,14 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0[0] = max(sum0[0], afp(0.f)); | |||
| sum0[1] = max(sum0[1], afp(0.f)); | |||
| sum1[0] = max(sum1[0], afp(0.f)); | |||
| sum1[1] = max(sum1[1], afp(0.f)); | |||
| sum2[0] = max(sum2[0], afp(0.f)); | |||
| sum2[1] = max(sum2[1], afp(0.f)); | |||
| sum3[0] = max(sum3[0], afp(0.f)); | |||
| sum3[1] = max(sum3[1], afp(0.f)); | |||
| sum4[0] = max(sum4[0], afp(0.f)); | |||
| sum4[1] = max(sum4[1], afp(0.f)); | |||
| sum5[0] = max(sum5[0], afp(0.f)); | |||
| sum5[1] = max(sum5[1], afp(0.f)); | |||
| sum6[0] = max(sum6[0], afp(0.f)); | |||
| sum6[1] = max(sum6[1], afp(0.f)); | |||
| sum7[0] = max(sum7[0], afp(0.f)); | |||
| sum7[1] = max(sum7[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); | |||
| sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); | |||
| sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); | |||
| sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); | |||
| sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); | |||
| sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); | |||
| sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); | |||
| sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); | |||
| sum4[0] = mix(sum4[0], sum4[0] * afp(slope), lessThan(sum4[0], afpvec4(0.f))); | |||
| sum4[1] = mix(sum4[1], sum4[1] * afp(slope), lessThan(sum4[1], afpvec4(0.f))); | |||
| sum5[0] = mix(sum5[0], sum5[0] * afp(slope), lessThan(sum5[0], afpvec4(0.f))); | |||
| sum5[1] = mix(sum5[1], sum5[1] * afp(slope), lessThan(sum5[1], afpvec4(0.f))); | |||
| sum6[0] = mix(sum6[0], sum6[0] * afp(slope), lessThan(sum6[0], afpvec4(0.f))); | |||
| sum6[1] = mix(sum6[1], sum6[1] * afp(slope), lessThan(sum6[1], afpvec4(0.f))); | |||
| sum7[0] = mix(sum7[0], sum7[0] * afp(slope), lessThan(sum7[0], afpvec4(0.f))); | |||
| sum7[1] = mix(sum7[1], sum7[1] * afp(slope), lessThan(sum7[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0[0] = clamp(sum0[0], const_min, const_max); | |||
| sum0[1] = clamp(sum0[1], const_min, const_max); | |||
| sum1[0] = clamp(sum1[0], const_min, const_max); | |||
| sum1[1] = clamp(sum1[1], const_min, const_max); | |||
| sum2[0] = clamp(sum2[0], const_min, const_max); | |||
| sum2[1] = clamp(sum2[1], const_min, const_max); | |||
| sum3[0] = clamp(sum3[0], const_min, const_max); | |||
| sum3[1] = clamp(sum3[1], const_min, const_max); | |||
| sum4[0] = clamp(sum4[0], const_min, const_max); | |||
| sum4[1] = clamp(sum4[1], const_min, const_max); | |||
| sum5[0] = clamp(sum5[0], const_min, const_max); | |||
| sum5[1] = clamp(sum5[1], const_min, const_max); | |||
| sum6[0] = clamp(sum6[0], const_min, const_max); | |||
| sum6[1] = clamp(sum6[1], const_min, const_max); | |||
| sum7[0] = clamp(sum7[0], const_min, const_max); | |||
| sum7[1] = clamp(sum7[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); | |||
| sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); | |||
| sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); | |||
| sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); | |||
| sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); | |||
| sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); | |||
| sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); | |||
| sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); | |||
| sum4[0] = afp(1.f) / (afp(1.f) + exp(-sum4[0])); | |||
| sum4[1] = afp(1.f) / (afp(1.f) + exp(-sum4[1])); | |||
| sum5[0] = afp(1.f) / (afp(1.f) + exp(-sum5[0])); | |||
| sum5[1] = afp(1.f) / (afp(1.f) + exp(-sum5[1])); | |||
| sum6[0] = afp(1.f) / (afp(1.f) + exp(-sum6[0])); | |||
| sum6[1] = afp(1.f) / (afp(1.f) + exp(-sum6[1])); | |||
| sum7[0] = afp(1.f) / (afp(1.f) + exp(-sum7[0])); | |||
| sum7[1] = afp(1.f) / (afp(1.f) + exp(-sum7[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); | |||
| sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); | |||
| sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); | |||
| sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); | |||
| sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); | |||
| sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); | |||
| sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); | |||
| sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); | |||
| sum4[0] = sum4[0] * tanh(log(exp(sum4[0]) + afp(1.f))); | |||
| sum4[1] = sum4[1] * tanh(log(exp(sum4[1]) + afp(1.f))); | |||
| sum5[0] = sum5[0] * tanh(log(exp(sum5[0]) + afp(1.f))); | |||
| sum5[1] = sum5[1] * tanh(log(exp(sum5[1]) + afp(1.f))); | |||
| sum6[0] = sum6[0] * tanh(log(exp(sum6[0]) + afp(1.f))); | |||
| sum6[1] = sum6[1] * tanh(log(exp(sum6[1]) + afp(1.f))); | |||
| sum7[0] = sum7[0] * tanh(log(exp(sum7[0]) + afp(1.f))); | |||
| sum7[1] = sum7[1] * tanh(log(exp(sum7[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum4[0] = sum4[0] * clamp(sum4[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum4[1] = sum4[1] * clamp(sum4[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum5[0] = sum5[0] * clamp(sum5[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum5[1] = sum5[1] * clamp(sum5[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum6[0] = sum6[0] * clamp(sum6[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum6[1] = sum6[1] * clamp(sum6[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum7[0] = sum7[0] * clamp(sum7[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum7[1] = sum7[1] * clamp(sum7[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); | |||
| sum4 = activation_afpvec8(sum4, activation_type, activation_param_0, activation_param_1); | |||
| sum5 = activation_afpvec8(sum5, activation_type, activation_param_0, activation_param_1); | |||
| sum6 = activation_afpvec8(sum6, activation_type, activation_param_0, activation_param_1); | |||
| sum7 = activation_afpvec8(sum7, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -222,77 +225,10 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0[0] = max(sum0[0], afp(0.f)); | |||
| sum0[1] = max(sum0[1], afp(0.f)); | |||
| sum1[0] = max(sum1[0], afp(0.f)); | |||
| sum1[1] = max(sum1[1], afp(0.f)); | |||
| sum2[0] = max(sum2[0], afp(0.f)); | |||
| sum2[1] = max(sum2[1], afp(0.f)); | |||
| sum3[0] = max(sum3[0], afp(0.f)); | |||
| sum3[1] = max(sum3[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); | |||
| sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); | |||
| sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); | |||
| sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); | |||
| sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); | |||
| sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); | |||
| sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); | |||
| sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0[0] = clamp(sum0[0], const_min, const_max); | |||
| sum0[1] = clamp(sum0[1], const_min, const_max); | |||
| sum1[0] = clamp(sum1[0], const_min, const_max); | |||
| sum1[1] = clamp(sum1[1], const_min, const_max); | |||
| sum2[0] = clamp(sum2[0], const_min, const_max); | |||
| sum2[1] = clamp(sum2[1], const_min, const_max); | |||
| sum3[0] = clamp(sum3[0], const_min, const_max); | |||
| sum3[1] = clamp(sum3[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); | |||
| sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); | |||
| sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); | |||
| sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); | |||
| sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); | |||
| sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); | |||
| sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); | |||
| sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); | |||
| sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); | |||
| sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); | |||
| sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); | |||
| sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); | |||
| sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); | |||
| sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); | |||
| sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -245,77 +248,10 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0[0] = max(sum0[0], afp(0.f)); | |||
| sum0[1] = max(sum0[1], afp(0.f)); | |||
| sum1[0] = max(sum1[0], afp(0.f)); | |||
| sum1[1] = max(sum1[1], afp(0.f)); | |||
| sum2[0] = max(sum2[0], afp(0.f)); | |||
| sum2[1] = max(sum2[1], afp(0.f)); | |||
| sum3[0] = max(sum3[0], afp(0.f)); | |||
| sum3[1] = max(sum3[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); | |||
| sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); | |||
| sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); | |||
| sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); | |||
| sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); | |||
| sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); | |||
| sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); | |||
| sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0[0] = clamp(sum0[0], const_min, const_max); | |||
| sum0[1] = clamp(sum0[1], const_min, const_max); | |||
| sum1[0] = clamp(sum1[0], const_min, const_max); | |||
| sum1[1] = clamp(sum1[1], const_min, const_max); | |||
| sum2[0] = clamp(sum2[0], const_min, const_max); | |||
| sum2[1] = clamp(sum2[1], const_min, const_max); | |||
| sum3[0] = clamp(sum3[0], const_min, const_max); | |||
| sum3[1] = clamp(sum3[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); | |||
| sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); | |||
| sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); | |||
| sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); | |||
| sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); | |||
| sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); | |||
| sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); | |||
| sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); | |||
| sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); | |||
| sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); | |||
| sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); | |||
| sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); | |||
| sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); | |||
| sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); | |||
| sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -342,125 +345,14 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0[0] = max(sum0[0], afp(0.f)); | |||
| sum0[1] = max(sum0[1], afp(0.f)); | |||
| sum1[0] = max(sum1[0], afp(0.f)); | |||
| sum1[1] = max(sum1[1], afp(0.f)); | |||
| sum2[0] = max(sum2[0], afp(0.f)); | |||
| sum2[1] = max(sum2[1], afp(0.f)); | |||
| sum3[0] = max(sum3[0], afp(0.f)); | |||
| sum3[1] = max(sum3[1], afp(0.f)); | |||
| sum4[0] = max(sum4[0], afp(0.f)); | |||
| sum4[1] = max(sum4[1], afp(0.f)); | |||
| sum5[0] = max(sum5[0], afp(0.f)); | |||
| sum5[1] = max(sum5[1], afp(0.f)); | |||
| sum6[0] = max(sum6[0], afp(0.f)); | |||
| sum6[1] = max(sum6[1], afp(0.f)); | |||
| sum7[0] = max(sum7[0], afp(0.f)); | |||
| sum7[1] = max(sum7[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); | |||
| sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); | |||
| sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); | |||
| sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); | |||
| sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); | |||
| sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); | |||
| sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); | |||
| sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); | |||
| sum4[0] = mix(sum4[0], sum4[0] * afp(slope), lessThan(sum4[0], afpvec4(0.f))); | |||
| sum4[1] = mix(sum4[1], sum4[1] * afp(slope), lessThan(sum4[1], afpvec4(0.f))); | |||
| sum5[0] = mix(sum5[0], sum5[0] * afp(slope), lessThan(sum5[0], afpvec4(0.f))); | |||
| sum5[1] = mix(sum5[1], sum5[1] * afp(slope), lessThan(sum5[1], afpvec4(0.f))); | |||
| sum6[0] = mix(sum6[0], sum6[0] * afp(slope), lessThan(sum6[0], afpvec4(0.f))); | |||
| sum6[1] = mix(sum6[1], sum6[1] * afp(slope), lessThan(sum6[1], afpvec4(0.f))); | |||
| sum7[0] = mix(sum7[0], sum7[0] * afp(slope), lessThan(sum7[0], afpvec4(0.f))); | |||
| sum7[1] = mix(sum7[1], sum7[1] * afp(slope), lessThan(sum7[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0[0] = clamp(sum0[0], const_min, const_max); | |||
| sum0[1] = clamp(sum0[1], const_min, const_max); | |||
| sum1[0] = clamp(sum1[0], const_min, const_max); | |||
| sum1[1] = clamp(sum1[1], const_min, const_max); | |||
| sum2[0] = clamp(sum2[0], const_min, const_max); | |||
| sum2[1] = clamp(sum2[1], const_min, const_max); | |||
| sum3[0] = clamp(sum3[0], const_min, const_max); | |||
| sum3[1] = clamp(sum3[1], const_min, const_max); | |||
| sum4[0] = clamp(sum4[0], const_min, const_max); | |||
| sum4[1] = clamp(sum4[1], const_min, const_max); | |||
| sum5[0] = clamp(sum5[0], const_min, const_max); | |||
| sum5[1] = clamp(sum5[1], const_min, const_max); | |||
| sum6[0] = clamp(sum6[0], const_min, const_max); | |||
| sum6[1] = clamp(sum6[1], const_min, const_max); | |||
| sum7[0] = clamp(sum7[0], const_min, const_max); | |||
| sum7[1] = clamp(sum7[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); | |||
| sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); | |||
| sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); | |||
| sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); | |||
| sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); | |||
| sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); | |||
| sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); | |||
| sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); | |||
| sum4[0] = afp(1.f) / (afp(1.f) + exp(-sum4[0])); | |||
| sum4[1] = afp(1.f) / (afp(1.f) + exp(-sum4[1])); | |||
| sum5[0] = afp(1.f) / (afp(1.f) + exp(-sum5[0])); | |||
| sum5[1] = afp(1.f) / (afp(1.f) + exp(-sum5[1])); | |||
| sum6[0] = afp(1.f) / (afp(1.f) + exp(-sum6[0])); | |||
| sum6[1] = afp(1.f) / (afp(1.f) + exp(-sum6[1])); | |||
| sum7[0] = afp(1.f) / (afp(1.f) + exp(-sum7[0])); | |||
| sum7[1] = afp(1.f) / (afp(1.f) + exp(-sum7[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); | |||
| sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); | |||
| sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); | |||
| sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); | |||
| sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); | |||
| sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); | |||
| sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); | |||
| sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); | |||
| sum4[0] = sum4[0] * tanh(log(exp(sum4[0]) + afp(1.f))); | |||
| sum4[1] = sum4[1] * tanh(log(exp(sum4[1]) + afp(1.f))); | |||
| sum5[0] = sum5[0] * tanh(log(exp(sum5[0]) + afp(1.f))); | |||
| sum5[1] = sum5[1] * tanh(log(exp(sum5[1]) + afp(1.f))); | |||
| sum6[0] = sum6[0] * tanh(log(exp(sum6[0]) + afp(1.f))); | |||
| sum6[1] = sum6[1] * tanh(log(exp(sum6[1]) + afp(1.f))); | |||
| sum7[0] = sum7[0] * tanh(log(exp(sum7[0]) + afp(1.f))); | |||
| sum7[1] = sum7[1] * tanh(log(exp(sum7[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum4[0] = sum4[0] * clamp(sum4[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum4[1] = sum4[1] * clamp(sum4[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum5[0] = sum5[0] * clamp(sum5[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum5[1] = sum5[1] * clamp(sum5[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum6[0] = sum6[0] * clamp(sum6[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum6[1] = sum6[1] * clamp(sum6[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum7[0] = sum7[0] * clamp(sum7[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum7[1] = sum7[1] * clamp(sum7[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); | |||
| sum4 = activation_afpvec8(sum4, activation_type, activation_param_0, activation_param_1); | |||
| sum5 = activation_afpvec8(sum5, activation_type, activation_param_0, activation_param_1); | |||
| sum6 = activation_afpvec8(sum6, activation_type, activation_param_0, activation_param_1); | |||
| sum7 = activation_afpvec8(sum7, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -222,77 +225,10 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0[0] = max(sum0[0], afp(0.f)); | |||
| sum0[1] = max(sum0[1], afp(0.f)); | |||
| sum1[0] = max(sum1[0], afp(0.f)); | |||
| sum1[1] = max(sum1[1], afp(0.f)); | |||
| sum2[0] = max(sum2[0], afp(0.f)); | |||
| sum2[1] = max(sum2[1], afp(0.f)); | |||
| sum3[0] = max(sum3[0], afp(0.f)); | |||
| sum3[1] = max(sum3[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); | |||
| sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); | |||
| sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); | |||
| sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); | |||
| sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); | |||
| sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); | |||
| sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); | |||
| sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0[0] = clamp(sum0[0], const_min, const_max); | |||
| sum0[1] = clamp(sum0[1], const_min, const_max); | |||
| sum1[0] = clamp(sum1[0], const_min, const_max); | |||
| sum1[1] = clamp(sum1[1], const_min, const_max); | |||
| sum2[0] = clamp(sum2[0], const_min, const_max); | |||
| sum2[1] = clamp(sum2[1], const_min, const_max); | |||
| sum3[0] = clamp(sum3[0], const_min, const_max); | |||
| sum3[1] = clamp(sum3[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); | |||
| sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); | |||
| sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); | |||
| sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); | |||
| sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); | |||
| sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); | |||
| sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); | |||
| sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); | |||
| sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); | |||
| sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); | |||
| sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); | |||
| sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); | |||
| sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); | |||
| sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); | |||
| sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -150,77 +153,10 @@ void main() | |||
| v11 = m11 - m12 + m13; | |||
| } | |||
| if (activation_type == 1) | |||
| { | |||
| v00[0] = max(v00[0], afp(0.f)); | |||
| v00[1] = max(v00[1], afp(0.f)); | |||
| v10[0] = max(v10[0], afp(0.f)); | |||
| v10[1] = max(v10[1], afp(0.f)); | |||
| v01[0] = max(v01[0], afp(0.f)); | |||
| v01[1] = max(v01[1], afp(0.f)); | |||
| v11[0] = max(v11[0], afp(0.f)); | |||
| v11[1] = max(v11[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| v00[0] = mix(v00[0], v00[0] * afp(slope), lessThan(v00[0], afpvec4(0.f))); | |||
| v00[1] = mix(v00[1], v00[1] * afp(slope), lessThan(v00[1], afpvec4(0.f))); | |||
| v10[0] = mix(v10[0], v10[0] * afp(slope), lessThan(v10[0], afpvec4(0.f))); | |||
| v10[1] = mix(v10[1], v10[1] * afp(slope), lessThan(v10[1], afpvec4(0.f))); | |||
| v01[0] = mix(v01[0], v01[0] * afp(slope), lessThan(v01[0], afpvec4(0.f))); | |||
| v01[1] = mix(v01[1], v01[1] * afp(slope), lessThan(v01[1], afpvec4(0.f))); | |||
| v11[0] = mix(v11[0], v11[0] * afp(slope), lessThan(v11[0], afpvec4(0.f))); | |||
| v11[1] = mix(v11[1], v11[1] * afp(slope), lessThan(v11[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| v00[0] = clamp(v00[0], const_min, const_max); | |||
| v00[1] = clamp(v00[1], const_min, const_max); | |||
| v10[0] = clamp(v10[0], const_min, const_max); | |||
| v10[1] = clamp(v10[1], const_min, const_max); | |||
| v01[0] = clamp(v01[0], const_min, const_max); | |||
| v01[1] = clamp(v01[1], const_min, const_max); | |||
| v11[0] = clamp(v11[0], const_min, const_max); | |||
| v11[1] = clamp(v11[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| v00[0] = afp(1.f) / (afp(1.f) + exp(-v00[0])); | |||
| v00[1] = afp(1.f) / (afp(1.f) + exp(-v00[1])); | |||
| v10[0] = afp(1.f) / (afp(1.f) + exp(-v10[0])); | |||
| v10[1] = afp(1.f) / (afp(1.f) + exp(-v10[1])); | |||
| v01[0] = afp(1.f) / (afp(1.f) + exp(-v01[0])); | |||
| v01[1] = afp(1.f) / (afp(1.f) + exp(-v01[1])); | |||
| v11[0] = afp(1.f) / (afp(1.f) + exp(-v11[0])); | |||
| v11[1] = afp(1.f) / (afp(1.f) + exp(-v11[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| v00[0] = v00[0] * tanh(log(exp(v00[0]) + afp(1.f))); | |||
| v00[1] = v00[1] * tanh(log(exp(v00[1]) + afp(1.f))); | |||
| v10[0] = v10[0] * tanh(log(exp(v10[0]) + afp(1.f))); | |||
| v10[1] = v10[1] * tanh(log(exp(v10[1]) + afp(1.f))); | |||
| v01[0] = v01[0] * tanh(log(exp(v01[0]) + afp(1.f))); | |||
| v01[1] = v01[1] * tanh(log(exp(v01[1]) + afp(1.f))); | |||
| v11[0] = v11[0] * tanh(log(exp(v11[0]) + afp(1.f))); | |||
| v11[1] = v11[1] * tanh(log(exp(v11[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| v00[0] = v00[0] * clamp(v00[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v00[1] = v00[1] * clamp(v00[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v01[0] = v01[0] * clamp(v01[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v01[1] = v01[1] * clamp(v01[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v10[0] = v10[0] * clamp(v10[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v10[1] = v10[1] * clamp(v10[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v11[0] = v11[0] * clamp(v11[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v11[1] = v11[1] * clamp(v11[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| v00 = activation_afpvec8(v00, activation_type, activation_param_0, activation_param_1); | |||
| v01 = activation_afpvec8(v01, activation_type, activation_param_0, activation_param_1); | |||
| v10 = activation_afpvec8(v10, activation_type, activation_param_0, activation_param_1); | |||
| v11 = activation_afpvec8(v11, activation_type, activation_param_0, activation_param_1); | |||
| // store 2x2 | |||
| int x = gx * 2; | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -238,221 +241,22 @@ void main() | |||
| v33 = bias_value + v33; | |||
| } | |||
| if (activation_type == 1) | |||
| { | |||
| v00[0] = max(v00[0], afp(0.f)); | |||
| v00[1] = max(v00[1], afp(0.f)); | |||
| v01[0] = max(v01[0], afp(0.f)); | |||
| v01[1] = max(v01[1], afp(0.f)); | |||
| v02[0] = max(v02[0], afp(0.f)); | |||
| v02[1] = max(v02[1], afp(0.f)); | |||
| v03[0] = max(v03[0], afp(0.f)); | |||
| v03[1] = max(v03[1], afp(0.f)); | |||
| v10[0] = max(v10[0], afp(0.f)); | |||
| v10[1] = max(v10[1], afp(0.f)); | |||
| v11[0] = max(v11[0], afp(0.f)); | |||
| v11[1] = max(v11[1], afp(0.f)); | |||
| v12[0] = max(v12[0], afp(0.f)); | |||
| v12[1] = max(v12[1], afp(0.f)); | |||
| v13[0] = max(v13[0], afp(0.f)); | |||
| v13[1] = max(v13[1], afp(0.f)); | |||
| v20[0] = max(v20[0], afp(0.f)); | |||
| v20[1] = max(v20[1], afp(0.f)); | |||
| v21[0] = max(v21[0], afp(0.f)); | |||
| v21[1] = max(v21[1], afp(0.f)); | |||
| v22[0] = max(v22[0], afp(0.f)); | |||
| v22[1] = max(v22[1], afp(0.f)); | |||
| v23[0] = max(v23[0], afp(0.f)); | |||
| v23[1] = max(v23[1], afp(0.f)); | |||
| v30[0] = max(v30[0], afp(0.f)); | |||
| v30[1] = max(v30[1], afp(0.f)); | |||
| v31[0] = max(v31[0], afp(0.f)); | |||
| v31[1] = max(v31[1], afp(0.f)); | |||
| v32[0] = max(v32[0], afp(0.f)); | |||
| v32[1] = max(v32[1], afp(0.f)); | |||
| v33[0] = max(v33[0], afp(0.f)); | |||
| v33[1] = max(v33[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| v00[0] = mix(v00[0], v00[0] * afp(slope), lessThan(v00[0], afpvec4(0.f))); | |||
| v00[1] = mix(v00[1], v00[1] * afp(slope), lessThan(v00[1], afpvec4(0.f))); | |||
| v01[0] = mix(v01[0], v01[0] * afp(slope), lessThan(v01[0], afpvec4(0.f))); | |||
| v01[1] = mix(v01[1], v01[1] * afp(slope), lessThan(v01[1], afpvec4(0.f))); | |||
| v02[0] = mix(v02[0], v02[0] * afp(slope), lessThan(v02[0], afpvec4(0.f))); | |||
| v02[1] = mix(v02[1], v02[1] * afp(slope), lessThan(v02[1], afpvec4(0.f))); | |||
| v03[0] = mix(v03[0], v03[0] * afp(slope), lessThan(v03[0], afpvec4(0.f))); | |||
| v03[1] = mix(v03[1], v03[1] * afp(slope), lessThan(v03[1], afpvec4(0.f))); | |||
| v10[0] = mix(v10[0], v10[0] * afp(slope), lessThan(v10[0], afpvec4(0.f))); | |||
| v10[1] = mix(v10[1], v10[1] * afp(slope), lessThan(v10[1], afpvec4(0.f))); | |||
| v11[0] = mix(v11[0], v11[0] * afp(slope), lessThan(v11[0], afpvec4(0.f))); | |||
| v11[1] = mix(v11[1], v11[1] * afp(slope), lessThan(v11[1], afpvec4(0.f))); | |||
| v12[0] = mix(v12[0], v12[0] * afp(slope), lessThan(v12[0], afpvec4(0.f))); | |||
| v12[1] = mix(v12[1], v12[1] * afp(slope), lessThan(v12[1], afpvec4(0.f))); | |||
| v13[0] = mix(v13[0], v13[0] * afp(slope), lessThan(v13[0], afpvec4(0.f))); | |||
| v13[1] = mix(v13[1], v13[1] * afp(slope), lessThan(v13[1], afpvec4(0.f))); | |||
| v20[0] = mix(v20[0], v20[0] * afp(slope), lessThan(v20[0], afpvec4(0.f))); | |||
| v20[1] = mix(v20[1], v20[1] * afp(slope), lessThan(v20[1], afpvec4(0.f))); | |||
| v21[0] = mix(v21[0], v21[0] * afp(slope), lessThan(v21[0], afpvec4(0.f))); | |||
| v21[1] = mix(v21[1], v21[1] * afp(slope), lessThan(v21[1], afpvec4(0.f))); | |||
| v22[0] = mix(v22[0], v22[0] * afp(slope), lessThan(v22[0], afpvec4(0.f))); | |||
| v22[1] = mix(v22[1], v22[1] * afp(slope), lessThan(v22[1], afpvec4(0.f))); | |||
| v23[0] = mix(v23[0], v23[0] * afp(slope), lessThan(v23[0], afpvec4(0.f))); | |||
| v23[1] = mix(v23[1], v23[1] * afp(slope), lessThan(v23[1], afpvec4(0.f))); | |||
| v30[0] = mix(v30[0], v30[0] * afp(slope), lessThan(v30[0], afpvec4(0.f))); | |||
| v30[1] = mix(v30[1], v30[1] * afp(slope), lessThan(v30[1], afpvec4(0.f))); | |||
| v31[0] = mix(v31[0], v31[0] * afp(slope), lessThan(v31[0], afpvec4(0.f))); | |||
| v31[1] = mix(v31[1], v31[1] * afp(slope), lessThan(v31[1], afpvec4(0.f))); | |||
| v32[0] = mix(v32[0], v32[0] * afp(slope), lessThan(v32[0], afpvec4(0.f))); | |||
| v32[1] = mix(v32[1], v32[1] * afp(slope), lessThan(v32[1], afpvec4(0.f))); | |||
| v33[0] = mix(v33[0], v33[0] * afp(slope), lessThan(v33[0], afpvec4(0.f))); | |||
| v33[1] = mix(v33[1], v33[1] * afp(slope), lessThan(v33[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| v00[0] = clamp(v00[0], const_min, const_max); | |||
| v00[1] = clamp(v00[1], const_min, const_max); | |||
| v01[0] = clamp(v01[0], const_min, const_max); | |||
| v01[1] = clamp(v01[1], const_min, const_max); | |||
| v02[0] = clamp(v02[0], const_min, const_max); | |||
| v02[1] = clamp(v02[1], const_min, const_max); | |||
| v03[0] = clamp(v03[0], const_min, const_max); | |||
| v03[1] = clamp(v03[1], const_min, const_max); | |||
| v10[0] = clamp(v10[0], const_min, const_max); | |||
| v10[1] = clamp(v10[1], const_min, const_max); | |||
| v11[0] = clamp(v11[0], const_min, const_max); | |||
| v11[1] = clamp(v11[1], const_min, const_max); | |||
| v12[0] = clamp(v12[0], const_min, const_max); | |||
| v12[1] = clamp(v12[1], const_min, const_max); | |||
| v13[0] = clamp(v13[0], const_min, const_max); | |||
| v13[1] = clamp(v13[1], const_min, const_max); | |||
| v20[0] = clamp(v20[0], const_min, const_max); | |||
| v20[1] = clamp(v20[1], const_min, const_max); | |||
| v21[0] = clamp(v21[0], const_min, const_max); | |||
| v21[1] = clamp(v21[1], const_min, const_max); | |||
| v22[0] = clamp(v22[0], const_min, const_max); | |||
| v22[1] = clamp(v22[1], const_min, const_max); | |||
| v23[0] = clamp(v23[0], const_min, const_max); | |||
| v23[1] = clamp(v23[1], const_min, const_max); | |||
| v30[0] = clamp(v30[0], const_min, const_max); | |||
| v30[1] = clamp(v30[1], const_min, const_max); | |||
| v31[0] = clamp(v31[0], const_min, const_max); | |||
| v31[1] = clamp(v31[1], const_min, const_max); | |||
| v32[0] = clamp(v32[0], const_min, const_max); | |||
| v32[1] = clamp(v32[1], const_min, const_max); | |||
| v33[0] = clamp(v33[0], const_min, const_max); | |||
| v33[1] = clamp(v33[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| v00[0] = afp(1.f) / (afp(1.f) + exp(-v00[0])); | |||
| v00[1] = afp(1.f) / (afp(1.f) + exp(-v00[1])); | |||
| v01[0] = afp(1.f) / (afp(1.f) + exp(-v01[0])); | |||
| v01[1] = afp(1.f) / (afp(1.f) + exp(-v01[1])); | |||
| v02[0] = afp(1.f) / (afp(1.f) + exp(-v02[0])); | |||
| v02[1] = afp(1.f) / (afp(1.f) + exp(-v02[1])); | |||
| v03[0] = afp(1.f) / (afp(1.f) + exp(-v03[0])); | |||
| v03[1] = afp(1.f) / (afp(1.f) + exp(-v03[1])); | |||
| v10[0] = afp(1.f) / (afp(1.f) + exp(-v10[0])); | |||
| v10[1] = afp(1.f) / (afp(1.f) + exp(-v10[1])); | |||
| v11[0] = afp(1.f) / (afp(1.f) + exp(-v11[0])); | |||
| v11[1] = afp(1.f) / (afp(1.f) + exp(-v11[1])); | |||
| v12[0] = afp(1.f) / (afp(1.f) + exp(-v12[0])); | |||
| v12[1] = afp(1.f) / (afp(1.f) + exp(-v12[1])); | |||
| v13[0] = afp(1.f) / (afp(1.f) + exp(-v13[0])); | |||
| v13[1] = afp(1.f) / (afp(1.f) + exp(-v13[1])); | |||
| v20[0] = afp(1.f) / (afp(1.f) + exp(-v20[0])); | |||
| v20[1] = afp(1.f) / (afp(1.f) + exp(-v20[1])); | |||
| v21[0] = afp(1.f) / (afp(1.f) + exp(-v21[0])); | |||
| v21[1] = afp(1.f) / (afp(1.f) + exp(-v21[1])); | |||
| v22[0] = afp(1.f) / (afp(1.f) + exp(-v22[0])); | |||
| v22[1] = afp(1.f) / (afp(1.f) + exp(-v22[1])); | |||
| v23[0] = afp(1.f) / (afp(1.f) + exp(-v23[0])); | |||
| v23[1] = afp(1.f) / (afp(1.f) + exp(-v23[1])); | |||
| v30[0] = afp(1.f) / (afp(1.f) + exp(-v30[0])); | |||
| v30[1] = afp(1.f) / (afp(1.f) + exp(-v30[1])); | |||
| v31[0] = afp(1.f) / (afp(1.f) + exp(-v31[0])); | |||
| v31[1] = afp(1.f) / (afp(1.f) + exp(-v31[1])); | |||
| v32[0] = afp(1.f) / (afp(1.f) + exp(-v32[0])); | |||
| v32[1] = afp(1.f) / (afp(1.f) + exp(-v32[1])); | |||
| v33[0] = afp(1.f) / (afp(1.f) + exp(-v33[0])); | |||
| v33[1] = afp(1.f) / (afp(1.f) + exp(-v33[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| v00[0] = v00[0] * tanh(log(exp(v00[0]) + afp(1.f))); | |||
| v00[1] = v00[1] * tanh(log(exp(v00[1]) + afp(1.f))); | |||
| v01[0] = v01[0] * tanh(log(exp(v01[0]) + afp(1.f))); | |||
| v01[1] = v01[1] * tanh(log(exp(v01[1]) + afp(1.f))); | |||
| v02[0] = v02[0] * tanh(log(exp(v02[0]) + afp(1.f))); | |||
| v02[1] = v02[1] * tanh(log(exp(v02[1]) + afp(1.f))); | |||
| v03[0] = v03[0] * tanh(log(exp(v03[0]) + afp(1.f))); | |||
| v03[1] = v03[1] * tanh(log(exp(v03[1]) + afp(1.f))); | |||
| v10[0] = v10[0] * tanh(log(exp(v10[0]) + afp(1.f))); | |||
| v10[1] = v10[1] * tanh(log(exp(v10[1]) + afp(1.f))); | |||
| v11[0] = v11[0] * tanh(log(exp(v11[0]) + afp(1.f))); | |||
| v11[1] = v11[1] * tanh(log(exp(v11[1]) + afp(1.f))); | |||
| v12[0] = v12[0] * tanh(log(exp(v12[0]) + afp(1.f))); | |||
| v12[1] = v12[1] * tanh(log(exp(v12[1]) + afp(1.f))); | |||
| v13[0] = v13[0] * tanh(log(exp(v13[0]) + afp(1.f))); | |||
| v13[1] = v13[1] * tanh(log(exp(v13[1]) + afp(1.f))); | |||
| v20[0] = v20[0] * tanh(log(exp(v20[0]) + afp(1.f))); | |||
| v20[1] = v20[1] * tanh(log(exp(v20[1]) + afp(1.f))); | |||
| v21[0] = v21[0] * tanh(log(exp(v21[0]) + afp(1.f))); | |||
| v21[1] = v21[1] * tanh(log(exp(v21[1]) + afp(1.f))); | |||
| v22[0] = v22[0] * tanh(log(exp(v22[0]) + afp(1.f))); | |||
| v22[1] = v22[1] * tanh(log(exp(v22[1]) + afp(1.f))); | |||
| v23[0] = v23[0] * tanh(log(exp(v23[0]) + afp(1.f))); | |||
| v23[1] = v23[1] * tanh(log(exp(v23[1]) + afp(1.f))); | |||
| v30[0] = v30[0] * tanh(log(exp(v30[0]) + afp(1.f))); | |||
| v30[1] = v30[1] * tanh(log(exp(v30[1]) + afp(1.f))); | |||
| v31[0] = v31[0] * tanh(log(exp(v31[0]) + afp(1.f))); | |||
| v31[1] = v31[1] * tanh(log(exp(v31[1]) + afp(1.f))); | |||
| v32[0] = v32[0] * tanh(log(exp(v32[0]) + afp(1.f))); | |||
| v32[1] = v32[1] * tanh(log(exp(v32[1]) + afp(1.f))); | |||
| v33[0] = v33[0] * tanh(log(exp(v33[0]) + afp(1.f))); | |||
| v33[1] = v33[1] * tanh(log(exp(v33[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| v00[0] = v00[0] * clamp(v00[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v00[1] = v00[1] * clamp(v00[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v01[0] = v01[0] * clamp(v01[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v01[1] = v01[1] * clamp(v01[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v02[0] = v02[0] * clamp(v02[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v02[1] = v02[1] * clamp(v02[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v03[0] = v03[0] * clamp(v03[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v03[1] = v03[1] * clamp(v03[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v10[0] = v10[0] * clamp(v10[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v10[1] = v10[1] * clamp(v10[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v11[0] = v11[0] * clamp(v11[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v11[1] = v11[1] * clamp(v11[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v12[0] = v12[0] * clamp(v12[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v12[1] = v12[1] * clamp(v12[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v13[0] = v13[0] * clamp(v13[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v13[1] = v13[1] * clamp(v13[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v20[0] = v20[0] * clamp(v20[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v20[1] = v20[1] * clamp(v20[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v21[0] = v21[0] * clamp(v21[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v21[1] = v21[1] * clamp(v21[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v22[0] = v22[0] * clamp(v22[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v22[1] = v22[1] * clamp(v22[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v23[0] = v23[0] * clamp(v23[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v23[1] = v23[1] * clamp(v23[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v30[0] = v30[0] * clamp(v30[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v30[1] = v30[1] * clamp(v30[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v31[0] = v31[0] * clamp(v31[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v31[1] = v31[1] * clamp(v31[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v32[0] = v32[0] * clamp(v32[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v32[1] = v32[1] * clamp(v32[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v33[0] = v33[0] * clamp(v33[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v33[1] = v33[1] * clamp(v33[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| v00 = activation_afpvec8(v00, activation_type, activation_param_0, activation_param_1); | |||
| v01 = activation_afpvec8(v01, activation_type, activation_param_0, activation_param_1); | |||
| v02 = activation_afpvec8(v02, activation_type, activation_param_0, activation_param_1); | |||
| v03 = activation_afpvec8(v03, activation_type, activation_param_0, activation_param_1); | |||
| v10 = activation_afpvec8(v10, activation_type, activation_param_0, activation_param_1); | |||
| v11 = activation_afpvec8(v11, activation_type, activation_param_0, activation_param_1); | |||
| v12 = activation_afpvec8(v12, activation_type, activation_param_0, activation_param_1); | |||
| v13 = activation_afpvec8(v13, activation_type, activation_param_0, activation_param_1); | |||
| v20 = activation_afpvec8(v20, activation_type, activation_param_0, activation_param_1); | |||
| v21 = activation_afpvec8(v21, activation_type, activation_param_0, activation_param_1); | |||
| v22 = activation_afpvec8(v22, activation_type, activation_param_0, activation_param_1); | |||
| v23 = activation_afpvec8(v23, activation_type, activation_param_0, activation_param_1); | |||
| v30 = activation_afpvec8(v30, activation_type, activation_param_0, activation_param_1); | |||
| v31 = activation_afpvec8(v31, activation_type, activation_param_0, activation_param_1); | |||
| v32 = activation_afpvec8(v32, activation_type, activation_param_0, activation_param_1); | |||
| v33 = activation_afpvec8(v33, activation_type, activation_param_0, activation_param_1); | |||
| // store 4x4 | |||
| int x = gx * 4; | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -245,77 +248,10 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0[0] = max(sum0[0], afp(0.f)); | |||
| sum0[1] = max(sum0[1], afp(0.f)); | |||
| sum1[0] = max(sum1[0], afp(0.f)); | |||
| sum1[1] = max(sum1[1], afp(0.f)); | |||
| sum2[0] = max(sum2[0], afp(0.f)); | |||
| sum2[1] = max(sum2[1], afp(0.f)); | |||
| sum3[0] = max(sum3[0], afp(0.f)); | |||
| sum3[1] = max(sum3[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0[0] = mix(sum0[0], sum0[0] * afp(slope), lessThan(sum0[0], afpvec4(0.f))); | |||
| sum0[1] = mix(sum0[1], sum0[1] * afp(slope), lessThan(sum0[1], afpvec4(0.f))); | |||
| sum1[0] = mix(sum1[0], sum1[0] * afp(slope), lessThan(sum1[0], afpvec4(0.f))); | |||
| sum1[1] = mix(sum1[1], sum1[1] * afp(slope), lessThan(sum1[1], afpvec4(0.f))); | |||
| sum2[0] = mix(sum2[0], sum2[0] * afp(slope), lessThan(sum2[0], afpvec4(0.f))); | |||
| sum2[1] = mix(sum2[1], sum2[1] * afp(slope), lessThan(sum2[1], afpvec4(0.f))); | |||
| sum3[0] = mix(sum3[0], sum3[0] * afp(slope), lessThan(sum3[0], afpvec4(0.f))); | |||
| sum3[1] = mix(sum3[1], sum3[1] * afp(slope), lessThan(sum3[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0[0] = clamp(sum0[0], const_min, const_max); | |||
| sum0[1] = clamp(sum0[1], const_min, const_max); | |||
| sum1[0] = clamp(sum1[0], const_min, const_max); | |||
| sum1[1] = clamp(sum1[1], const_min, const_max); | |||
| sum2[0] = clamp(sum2[0], const_min, const_max); | |||
| sum2[1] = clamp(sum2[1], const_min, const_max); | |||
| sum3[0] = clamp(sum3[0], const_min, const_max); | |||
| sum3[1] = clamp(sum3[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0[0] = afp(1.f) / (afp(1.f) + exp(-sum0[0])); | |||
| sum0[1] = afp(1.f) / (afp(1.f) + exp(-sum0[1])); | |||
| sum1[0] = afp(1.f) / (afp(1.f) + exp(-sum1[0])); | |||
| sum1[1] = afp(1.f) / (afp(1.f) + exp(-sum1[1])); | |||
| sum2[0] = afp(1.f) / (afp(1.f) + exp(-sum2[0])); | |||
| sum2[1] = afp(1.f) / (afp(1.f) + exp(-sum2[1])); | |||
| sum3[0] = afp(1.f) / (afp(1.f) + exp(-sum3[0])); | |||
| sum3[1] = afp(1.f) / (afp(1.f) + exp(-sum3[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0[0] = sum0[0] * tanh(log(exp(sum0[0]) + afp(1.f))); | |||
| sum0[1] = sum0[1] * tanh(log(exp(sum0[1]) + afp(1.f))); | |||
| sum1[0] = sum1[0] * tanh(log(exp(sum1[0]) + afp(1.f))); | |||
| sum1[1] = sum1[1] * tanh(log(exp(sum1[1]) + afp(1.f))); | |||
| sum2[0] = sum2[0] * tanh(log(exp(sum2[0]) + afp(1.f))); | |||
| sum2[1] = sum2[1] * tanh(log(exp(sum2[1]) + afp(1.f))); | |||
| sum3[0] = sum3[0] * tanh(log(exp(sum3[0]) + afp(1.f))); | |||
| sum3[1] = sum3[1] * tanh(log(exp(sum3[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0[0] = sum0[0] * clamp(sum0[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum0[1] = sum0[1] * clamp(sum0[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[0] = sum1[0] * clamp(sum1[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1[1] = sum1[1] * clamp(sum1[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[0] = sum2[0] * clamp(sum2[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2[1] = sum2[1] * clamp(sum2[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[0] = sum3[0] * clamp(sum3[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3[1] = sum3[1] * clamp(sum3[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec8(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec8(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec8(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec8(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -198,77 +201,14 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| sum4 = max(sum4, afp(0.f)); | |||
| sum5 = max(sum5, afp(0.f)); | |||
| sum6 = max(sum6, afp(0.f)); | |||
| sum7 = max(sum7, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = sum0 < afp(0.f) ? sum0 * slope : sum0; | |||
| sum1 = sum1 < afp(0.f) ? sum1 * slope : sum1; | |||
| sum2 = sum2 < afp(0.f) ? sum2 * slope : sum2; | |||
| sum3 = sum3 < afp(0.f) ? sum3 * slope : sum3; | |||
| sum4 = sum4 < afp(0.f) ? sum4 * slope : sum4; | |||
| sum5 = sum5 < afp(0.f) ? sum5 * slope : sum5; | |||
| sum6 = sum6 < afp(0.f) ? sum6 * slope : sum6; | |||
| sum7 = sum7 < afp(0.f) ? sum7 * slope : sum7; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| sum4 = clamp(sum4, const_min, const_max); | |||
| sum5 = clamp(sum5, const_min, const_max); | |||
| sum6 = clamp(sum6, const_min, const_max); | |||
| sum7 = clamp(sum7, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| sum4 = afp(1.f) / (afp(1.f) + exp(-sum4)); | |||
| sum5 = afp(1.f) / (afp(1.f) + exp(-sum5)); | |||
| sum6 = afp(1.f) / (afp(1.f) + exp(-sum6)); | |||
| sum7 = afp(1.f) / (afp(1.f) + exp(-sum7)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| sum4 = sum4 * tanh(log(exp(sum4) + afp(1.f))); | |||
| sum5 = sum5 * tanh(log(exp(sum5) + afp(1.f))); | |||
| sum6 = sum6 * tanh(log(exp(sum6) + afp(1.f))); | |||
| sum7 = sum7 * tanh(log(exp(sum7) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum4 = sum4 * clamp(sum4 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum5 = sum5 * clamp(sum5 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum6 = sum6 * clamp(sum6 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum7 = sum7 * clamp(sum7 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1); | |||
| sum4 = activation_afp(sum4, activation_type, activation_param_0, activation_param_1); | |||
| sum5 = activation_afp(sum5, activation_type, activation_param_0, activation_param_1); | |||
| sum6 = activation_afp(sum6, activation_type, activation_param_0, activation_param_1); | |||
| sum7 = activation_afp(sum7, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -146,53 +149,10 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = sum0 < afp(0.f) ? sum0 * slope : sum0; | |||
| sum1 = sum1 < afp(0.f) ? sum1 * slope : sum1; | |||
| sum2 = sum2 < afp(0.f) ? sum2 * slope : sum2; | |||
| sum3 = sum3 < afp(0.f) ? sum3 * slope : sum3; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -169,53 +172,10 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = sum0 < afp(0.f) ? sum0 * slope : sum0; | |||
| sum1 = sum1 < afp(0.f) ? sum1 * slope : sum1; | |||
| sum2 = sum2 < afp(0.f) ? sum2 * slope : sum2; | |||
| sum3 = sum3 < afp(0.f) ? sum3 * slope : sum3; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afp(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afp(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afp(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afp(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -258,77 +261,14 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| sum4 = max(sum4, afp(0.f)); | |||
| sum5 = max(sum5, afp(0.f)); | |||
| sum6 = max(sum6, afp(0.f)); | |||
| sum7 = max(sum7, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); | |||
| sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); | |||
| sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); | |||
| sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); | |||
| sum4 = mix(sum4, sum4 * afp(slope), lessThan(sum4, afpvec4(0.f))); | |||
| sum5 = mix(sum5, sum5 * afp(slope), lessThan(sum5, afpvec4(0.f))); | |||
| sum6 = mix(sum6, sum6 * afp(slope), lessThan(sum6, afpvec4(0.f))); | |||
| sum7 = mix(sum7, sum7 * afp(slope), lessThan(sum7, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| sum4 = clamp(sum4, const_min, const_max); | |||
| sum5 = clamp(sum5, const_min, const_max); | |||
| sum6 = clamp(sum6, const_min, const_max); | |||
| sum7 = clamp(sum7, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| sum4 = afp(1.f) / (afp(1.f) + exp(-sum4)); | |||
| sum5 = afp(1.f) / (afp(1.f) + exp(-sum5)); | |||
| sum6 = afp(1.f) / (afp(1.f) + exp(-sum6)); | |||
| sum7 = afp(1.f) / (afp(1.f) + exp(-sum7)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| sum4 = sum4 * tanh(log(exp(sum4) + afp(1.f))); | |||
| sum5 = sum5 * tanh(log(exp(sum5) + afp(1.f))); | |||
| sum6 = sum6 * tanh(log(exp(sum6) + afp(1.f))); | |||
| sum7 = sum7 * tanh(log(exp(sum7) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum4 = sum4 * clamp(sum4 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum5 = sum5 * clamp(sum5 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum6 = sum6 * clamp(sum6 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum7 = sum7 * clamp(sum7 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); | |||
| sum4 = activation_afpvec4(sum4, activation_type, activation_param_0, activation_param_1); | |||
| sum5 = activation_afpvec4(sum5, activation_type, activation_param_0, activation_param_1); | |||
| sum6 = activation_afpvec4(sum6, activation_type, activation_param_0, activation_param_1); | |||
| sum7 = activation_afpvec4(sum7, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -182,53 +185,10 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); | |||
| sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); | |||
| sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); | |||
| sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -205,53 +208,10 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum0 = max(sum0, afp(0.f)); | |||
| sum1 = max(sum1, afp(0.f)); | |||
| sum2 = max(sum2, afp(0.f)); | |||
| sum3 = max(sum3, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum0 = mix(sum0, sum0 * afp(slope), lessThan(sum0, afpvec4(0.f))); | |||
| sum1 = mix(sum1, sum1 * afp(slope), lessThan(sum1, afpvec4(0.f))); | |||
| sum2 = mix(sum2, sum2 * afp(slope), lessThan(sum2, afpvec4(0.f))); | |||
| sum3 = mix(sum3, sum3 * afp(slope), lessThan(sum3, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum0 = clamp(sum0, const_min, const_max); | |||
| sum1 = clamp(sum1, const_min, const_max); | |||
| sum2 = clamp(sum2, const_min, const_max); | |||
| sum3 = clamp(sum3, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum0 = afp(1.f) / (afp(1.f) + exp(-sum0)); | |||
| sum1 = afp(1.f) / (afp(1.f) + exp(-sum1)); | |||
| sum2 = afp(1.f) / (afp(1.f) + exp(-sum2)); | |||
| sum3 = afp(1.f) / (afp(1.f) + exp(-sum3)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum0 = sum0 * tanh(log(exp(sum0) + afp(1.f))); | |||
| sum1 = sum1 * tanh(log(exp(sum1) + afp(1.f))); | |||
| sum2 = sum2 * tanh(log(exp(sum2) + afp(1.f))); | |||
| sum3 = sum3 * tanh(log(exp(sum3) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum0 = sum0 * clamp(sum0 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum1 = sum1 * clamp(sum1 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum2 = sum2 * clamp(sum2 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum3 = sum3 * clamp(sum3 * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum0 = activation_afpvec4(sum0, activation_type, activation_param_0, activation_param_1); | |||
| sum1 = activation_afpvec4(sum1, activation_type, activation_param_0, activation_param_1); | |||
| sum2 = activation_afpvec4(sum2, activation_type, activation_param_0, activation_param_1); | |||
| sum3 = activation_afpvec4(sum3, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(sx4.r, sy4.r, gy), sum0); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -132,35 +135,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -152,35 +155,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -160,35 +163,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -165,41 +168,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); | |||
| sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -180,35 +183,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -160,35 +163,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -191,41 +194,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); | |||
| sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -191,41 +194,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); | |||
| sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -163,35 +166,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -175,35 +178,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -140,35 +143,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -145,41 +148,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); | |||
| sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -165,25 +168,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -120,25 +123,7 @@ void main() | |||
| } | |||
| } | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -173,25 +176,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -178,29 +181,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -193,25 +196,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -120,25 +123,7 @@ void main() | |||
| } | |||
| } | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -173,25 +176,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -204,29 +207,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -204,29 +207,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -121,29 +124,7 @@ void main() | |||
| } | |||
| } | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -176,25 +179,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -188,25 +191,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -159,25 +162,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -177,25 +180,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -185,25 +188,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -190,29 +193,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -205,25 +208,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -185,25 +188,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -216,29 +219,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -216,29 +219,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -188,25 +191,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -200,25 +203,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -167,25 +170,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int kernel_w = 1; | |||
| layout (constant_id = 1) const int kernel_h = 1; | |||
| layout (constant_id = 2) const int dilation_w = 1; | |||
| @@ -172,29 +175,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx, gy, gz), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -104,35 +107,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, 0, 0), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -105,35 +108,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, gy, 0), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -113,35 +116,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx * 4 + 0, gy, 0), sum.r); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -118,41 +121,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); | |||
| sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx * 8 + 0, gy, 0), sum[0].r); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -141,35 +144,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx * 4 + 0, gy, 0), sum.r); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -121,35 +124,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, gy, 0), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -152,41 +155,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); | |||
| sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx * 8 + 0, gy, 0), sum[0].r); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -160,41 +163,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); | |||
| sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx * 8 + 0, gy, 0), sum[0].r); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -132,35 +135,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, gy, 0), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -144,35 +147,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx * 4 + 0, gy, 0), sum.r); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -112,35 +115,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx, 0, 0), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -119,41 +122,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); | |||
| sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx, 0, 0), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -135,35 +138,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx, 0, 0), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -112,35 +115,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, 0, 0), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -147,41 +150,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); | |||
| sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx, 0, 0), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -147,41 +150,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); | |||
| sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx, 0, 0), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -115,35 +118,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, 0, 0), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -131,35 +134,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx, 0, 0), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -88,35 +91,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = sum < afp(0.f) ? sum * slope : sum; | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afp(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob, ivec3(gx, 0, 0), sum); | |||
| @@ -21,6 +21,9 @@ | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -88,35 +91,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum = max(sum, afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum = mix(sum, sum * afp(slope), lessThan(sum, afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum = clamp(sum, const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum = afp(1.f) / (afp(1.f) + exp(-sum)); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum = sum * tanh(log(exp(sum) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum = sum * clamp(sum * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec4(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob, ivec3(gx, 0, 0), sum); | |||
| @@ -22,6 +22,9 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #extension GL_EXT_shader_explicit_arithmetic_types_float16: require | |||
| #endif | |||
| #extension GL_GOOGLE_include_directive: enable | |||
| #include "vulkan_activation.comp" | |||
| layout (constant_id = 0) const int bias_term = 0; | |||
| layout (constant_id = 1) const int activation_type = 0; | |||
| layout (constant_id = 2) const float activation_param_0 = 0; | |||
| @@ -89,41 +92,7 @@ void main() | |||
| } | |||
| #endif | |||
| if (activation_type == 1) | |||
| { | |||
| sum[0] = max(sum[0], afp(0.f)); | |||
| sum[1] = max(sum[1], afp(0.f)); | |||
| } | |||
| if (activation_type == 2) | |||
| { | |||
| const afp slope = afp(activation_param_0); | |||
| sum[0] = mix(sum[0], sum[0] * afp(slope), lessThan(sum[0], afpvec4(0.f))); | |||
| sum[1] = mix(sum[1], sum[1] * afp(slope), lessThan(sum[1], afpvec4(0.f))); | |||
| } | |||
| if (activation_type == 3) | |||
| { | |||
| const afp const_min = afp(activation_param_0); | |||
| const afp const_max = afp(activation_param_1); | |||
| sum[0] = clamp(sum[0], const_min, const_max); | |||
| sum[1] = clamp(sum[1], const_min, const_max); | |||
| } | |||
| if (activation_type == 4) | |||
| { | |||
| sum[0] = afp(1.f) / (afp(1.f) + exp(-sum[0])); | |||
| sum[1] = afp(1.f) / (afp(1.f) + exp(-sum[1])); | |||
| } | |||
| if (activation_type == 5) | |||
| { | |||
| sum[0] = sum[0] * tanh(log(exp(sum[0]) + afp(1.f))); | |||
| sum[1] = sum[1] * tanh(log(exp(sum[1]) + afp(1.f))); | |||
| } | |||
| if (activation_type == 6) | |||
| { | |||
| const afp alpha = afp(activation_param_0); | |||
| const afp beta = afp(activation_param_1); | |||
| sum[0] = sum[0] * clamp(sum[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| sum[1] = sum[1] * clamp(sum[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| } | |||
| sum = activation_afpvec8(sum, activation_type, activation_param_0, activation_param_1); | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob, ivec3(gx, 0, 0), sum); | |||
| @@ -61,7 +61,11 @@ void main() | |||
| afp v = buffer_ld1(bottom_top_blob_data, gi); | |||
| #endif | |||
| #if NCNN_moltenvk | |||
| v = v * afp(tanh(float(log(exp(v) + afp(1.f))))); | |||
| #else | |||
| v = v * tanh(log(exp(v) + afp(1.f))); | |||
| #endif | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| @@ -61,7 +61,11 @@ void main() | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| #endif | |||
| #if NCNN_moltenvk | |||
| v = v * afpvec4(tanh(vec4(log(exp(v) + afpvec4(1.f))))); | |||
| #else | |||
| v = v * tanh(log(exp(v) + afpvec4(1.f))); | |||
| #endif | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| @@ -62,8 +62,13 @@ void main() | |||
| afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); | |||
| #endif | |||
| #if NCNN_moltenvk | |||
| v[0] = v[0] * afpvec4(tanh(vec4(log(exp(v[0]) + afpvec4(1.f))))); | |||
| v[1] = v[1] * afpvec4(tanh(vec4(log(exp(v[1]) + afpvec4(1.f))))); | |||
| #else | |||
| v[0] = v[0] * tanh(log(exp(v[0]) + afpvec4(1.f))); | |||
| v[1] = v[1] * tanh(log(exp(v[1]) + afpvec4(1.f))); | |||
| #endif | |||
| #if NCNN_image_shader | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| @@ -61,7 +61,11 @@ void main() | |||
| afp v = buffer_ld1(bottom_top_blob_data, gi); | |||
| #endif | |||
| #if NCNN_moltenvk | |||
| v = afp(tanh(float(v))); | |||
| #else | |||
| v = tanh(v); | |||
| #endif | |||
| #if NCNN_image_shader | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| @@ -61,7 +61,11 @@ void main() | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| #endif | |||
| #if NCNN_moltenvk | |||
| v = afpvec4(tanh(vec4(v))); | |||
| #else | |||
| v = tanh(v); | |||
| #endif | |||
| #if NCNN_image_shader | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); | |||