From e2d93a482e8535cd258c178e91c1f2884e8e30b7 Mon Sep 17 00:00:00 2001 From: Yexuan Wu <56149058+futz12@users.noreply.github.com> Date: Sat, 12 Jul 2025 17:09:38 +0800 Subject: [PATCH] Unified elempack activation function vulkan shader (#6175) --- src/layer/vulkan/celu_vulkan.cpp | 88 +++--------------- src/layer/vulkan/celu_vulkan.h | 2 - src/layer/vulkan/clip_vulkan.cpp | 88 +++--------------- src/layer/vulkan/clip_vulkan.h | 2 - src/layer/vulkan/dropout_vulkan.cpp | 82 +++-------------- src/layer/vulkan/dropout_vulkan.h | 2 - src/layer/vulkan/elu_vulkan.cpp | 88 +++--------------- src/layer/vulkan/elu_vulkan.h | 2 - src/layer/vulkan/erf_vulkan.cpp | 82 +++-------------- src/layer/vulkan/erf_vulkan.h | 2 - src/layer/vulkan/gelu_vulkan.cpp | 88 +++--------------- src/layer/vulkan/gelu_vulkan.h | 2 - src/layer/vulkan/hardsigmoid_vulkan.cpp | 82 +++-------------- src/layer/vulkan/hardsigmoid_vulkan.h | 2 - src/layer/vulkan/hardswish_vulkan.cpp | 82 +++-------------- src/layer/vulkan/hardswish_vulkan.h | 2 - src/layer/vulkan/mish_vulkan.cpp | 88 +++--------------- src/layer/vulkan/mish_vulkan.h | 2 - src/layer/vulkan/relu_vulkan.cpp | 88 +++--------------- src/layer/vulkan/relu_vulkan.h | 2 - src/layer/vulkan/shader/celu.comp | 28 ++---- src/layer/vulkan/shader/celu_pack4.comp | 42 --------- src/layer/vulkan/shader/celu_pack8.comp | 43 --------- src/layer/vulkan/shader/clip.comp | 30 ++----- src/layer/vulkan/shader/clip_pack4.comp | 43 --------- src/layer/vulkan/shader/clip_pack8.comp | 44 --------- src/layer/vulkan/shader/dropout.comp | 29 ++---- src/layer/vulkan/shader/dropout_pack4.comp | 42 --------- src/layer/vulkan/shader/dropout_pack8.comp | 43 --------- src/layer/vulkan/shader/elu.comp | 28 ++---- src/layer/vulkan/shader/elu_pack4.comp | 42 --------- src/layer/vulkan/shader/elu_pack8.comp | 43 --------- src/layer/vulkan/shader/erf.comp | 48 ++++------ src/layer/vulkan/shader/erf_pack4.comp | 55 ------------ src/layer/vulkan/shader/erf_pack8.comp | 56 ------------ src/layer/vulkan/shader/gelu.comp | 29 ++---- src/layer/vulkan/shader/gelu_pack4.comp | 46 ---------- src/layer/vulkan/shader/gelu_pack8.comp | 48 ---------- src/layer/vulkan/shader/hardsigmoid.comp | 29 ++---- .../vulkan/shader/hardsigmoid_pack4.comp | 43 --------- .../vulkan/shader/hardsigmoid_pack8.comp | 44 --------- src/layer/vulkan/shader/hardswish.comp | 28 ++---- src/layer/vulkan/shader/hardswish_pack4.comp | 43 --------- src/layer/vulkan/shader/hardswish_pack8.comp | 44 --------- src/layer/vulkan/shader/mish.comp | 34 +++---- src/layer/vulkan/shader/mish_pack4.comp | 44 --------- src/layer/vulkan/shader/mish_pack8.comp | 46 ---------- src/layer/vulkan/shader/relu.comp | 31 +++---- src/layer/vulkan/shader/relu_pack4.comp | 45 ---------- src/layer/vulkan/shader/relu_pack8.comp | 51 ----------- src/layer/vulkan/shader/swish.comp | 31 ++----- src/layer/vulkan/shader/swish_pack4.comp | 40 --------- src/layer/vulkan/shader/swish_pack8.comp | 41 --------- src/layer/vulkan/shader/tanh.comp | 30 ++----- src/layer/vulkan/shader/tanh_pack4.comp | 44 --------- src/layer/vulkan/shader/tanh_pack8.comp | 46 ---------- src/layer/vulkan/swish_vulkan.cpp | 89 +++---------------- src/layer/vulkan/swish_vulkan.h | 2 - src/layer/vulkan/tanh_vulkan.cpp | 88 +++--------------- src/layer/vulkan/tanh_vulkan.h | 2 - 60 files changed, 285 insertions(+), 2225 deletions(-) delete mode 100644 src/layer/vulkan/shader/celu_pack4.comp delete mode 100644 src/layer/vulkan/shader/celu_pack8.comp delete mode 100644 src/layer/vulkan/shader/clip_pack4.comp delete mode 100644 src/layer/vulkan/shader/clip_pack8.comp delete mode 100644 src/layer/vulkan/shader/dropout_pack4.comp delete mode 100644 src/layer/vulkan/shader/dropout_pack8.comp delete mode 100644 src/layer/vulkan/shader/elu_pack4.comp delete mode 100644 src/layer/vulkan/shader/elu_pack8.comp delete mode 100644 src/layer/vulkan/shader/erf_pack4.comp delete mode 100644 src/layer/vulkan/shader/erf_pack8.comp delete mode 100644 src/layer/vulkan/shader/gelu_pack4.comp delete mode 100644 src/layer/vulkan/shader/gelu_pack8.comp delete mode 100644 src/layer/vulkan/shader/hardsigmoid_pack4.comp delete mode 100644 src/layer/vulkan/shader/hardsigmoid_pack8.comp delete mode 100644 src/layer/vulkan/shader/hardswish_pack4.comp delete mode 100644 src/layer/vulkan/shader/hardswish_pack8.comp delete mode 100644 src/layer/vulkan/shader/mish_pack4.comp delete mode 100644 src/layer/vulkan/shader/mish_pack8.comp delete mode 100644 src/layer/vulkan/shader/relu_pack4.comp delete mode 100644 src/layer/vulkan/shader/relu_pack8.comp delete mode 100644 src/layer/vulkan/shader/swish_pack4.comp delete mode 100644 src/layer/vulkan/shader/swish_pack8.comp delete mode 100644 src/layer/vulkan/shader/tanh_pack4.comp delete mode 100644 src/layer/vulkan/shader/tanh_pack8.comp diff --git a/src/layer/vulkan/celu_vulkan.cpp b/src/layer/vulkan/celu_vulkan.cpp index 448ebb6b8..176b56c8a 100644 --- a/src/layer/vulkan/celu_vulkan.cpp +++ b/src/layer/vulkan/celu_vulkan.cpp @@ -12,8 +12,6 @@ CELU_vulkan::CELU_vulkan() support_vulkan = true; pipeline_celu = 0; - pipeline_celu_pack4 = 0; - pipeline_celu_pack8 = 0; } int CELU_vulkan::create_pipeline(const Option& opt) @@ -41,63 +39,15 @@ int CELU_vulkan::create_pipeline(const Option& opt) if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); - std::vector specializations(1 + 5); + std::vector specializations(1 + 1); specializations[0].f = alpha; - specializations[1 + 0].i = shape_packed.dims; - specializations[1 + 1].i = shape_packed.w; - specializations[1 + 2].i = shape_packed.h * shape_packed.d; - specializations[1 + 3].i = shape_packed.c; - specializations[1 + 4].i = shape_packed.cstep; - - Mat local_size_xyz; - if (shape_packed.dims == 1) - { - local_size_xyz.w = std::min(64, shape_packed.w); - local_size_xyz.h = 1; - local_size_xyz.c = 1; - } - if (shape_packed.dims == 2) - { - local_size_xyz.w = std::min(8, shape_packed.w); - local_size_xyz.h = std::min(8, shape_packed.h); - local_size_xyz.c = 1; - } - if (shape_packed.dims == 3) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h); - local_size_xyz.c = std::min(4, shape_packed.c); - } - if (shape_packed.dims == 4) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); - local_size_xyz.c = std::min(4, shape_packed.c); - } - - // pack1 - if (shape.dims == 0 || elempack == 1) - { - pipeline_celu = new Pipeline(vkdev); - pipeline_celu->set_optimal_local_size_xyz(local_size_xyz); - pipeline_celu->create(LayerShaderType::celu, opt, specializations); - } + specializations[1 + 0].u32 = shape_packed.total() * elempack / 4; - // pack4 - if (shape.dims == 0 || elempack == 4) - { - pipeline_celu_pack4 = new Pipeline(vkdev); - pipeline_celu_pack4->set_optimal_local_size_xyz(local_size_xyz); - pipeline_celu_pack4->create(LayerShaderType::celu_pack4, opt, specializations); - } + const int local_size_x = vkdev->info.subgroup_size(); - // pack8 - if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) - { - pipeline_celu_pack8 = new Pipeline(vkdev); - pipeline_celu_pack8->set_optimal_local_size_xyz(local_size_xyz); - pipeline_celu_pack8->create(LayerShaderType::celu_pack8, opt, specializations); - } + pipeline_celu = new Pipeline(vkdev); + pipeline_celu->set_optimal_local_size_xyz(local_size_x, 1, 1); + pipeline_celu->create(LayerShaderType::celu, opt, specializations); return 0; } @@ -107,34 +57,24 @@ int CELU_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_celu; pipeline_celu = 0; - delete pipeline_celu_pack4; - pipeline_celu_pack4 = 0; - - delete pipeline_celu_pack8; - pipeline_celu_pack8 = 0; - return 0; } int CELU_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const { - int elempack = bottom_top_blob.elempack; + const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; std::vector bindings(1); bindings[0] = bottom_top_blob; - std::vector constants(5); - constants[0].i = bottom_top_blob.dims; - constants[1].i = bottom_top_blob.w; - constants[2].i = bottom_top_blob.h * bottom_top_blob.d; - constants[3].i = bottom_top_blob.c; - constants[4].i = bottom_top_blob.cstep; - - const Pipeline* pipeline = elempack == 8 ? pipeline_celu_pack8 - : elempack == 4 ? pipeline_celu_pack4 - : pipeline_celu; + std::vector constants(1); + constants[0].u32 = n; - cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); + VkMat dispatcher; + dispatcher.w = n; + dispatcher.h = 1; + dispatcher.c = 1; + cmd.record_pipeline(pipeline_celu, bindings, constants, dispatcher); return 0; } diff --git a/src/layer/vulkan/celu_vulkan.h b/src/layer/vulkan/celu_vulkan.h index 2d2a3080f..f8a51271b 100644 --- a/src/layer/vulkan/celu_vulkan.h +++ b/src/layer/vulkan/celu_vulkan.h @@ -21,8 +21,6 @@ public: public: Pipeline* pipeline_celu; - Pipeline* pipeline_celu_pack4; - Pipeline* pipeline_celu_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/clip_vulkan.cpp b/src/layer/vulkan/clip_vulkan.cpp index 90045d309..445896e23 100644 --- a/src/layer/vulkan/clip_vulkan.cpp +++ b/src/layer/vulkan/clip_vulkan.cpp @@ -12,8 +12,6 @@ Clip_vulkan::Clip_vulkan() support_vulkan = true; pipeline_clip = 0; - pipeline_clip_pack4 = 0; - pipeline_clip_pack8 = 0; } int Clip_vulkan::create_pipeline(const Option& opt) @@ -41,64 +39,16 @@ int Clip_vulkan::create_pipeline(const Option& opt) if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); - std::vector specializations(2 + 5); + std::vector specializations(2 + 1); specializations[0].f = min; specializations[1].f = max; - specializations[2 + 0].i = shape_packed.dims; - specializations[2 + 1].i = shape_packed.w; - specializations[2 + 2].i = shape_packed.h * shape_packed.d; - specializations[2 + 3].i = shape_packed.c; - specializations[2 + 4].i = shape_packed.cstep; - - Mat local_size_xyz; - if (shape_packed.dims == 1) - { - local_size_xyz.w = std::min(64, shape_packed.w); - local_size_xyz.h = 1; - local_size_xyz.c = 1; - } - if (shape_packed.dims == 2) - { - local_size_xyz.w = std::min(8, shape_packed.w); - local_size_xyz.h = std::min(8, shape_packed.h); - local_size_xyz.c = 1; - } - if (shape_packed.dims == 3) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h); - local_size_xyz.c = std::min(4, shape_packed.c); - } - if (shape_packed.dims == 4) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); - local_size_xyz.c = std::min(4, shape_packed.c); - } - - // pack1 - if (shape.dims == 0 || elempack == 1) - { - pipeline_clip = new Pipeline(vkdev); - pipeline_clip->set_optimal_local_size_xyz(local_size_xyz); - pipeline_clip->create(LayerShaderType::clip, opt, specializations); - } + specializations[2 + 0].u32 = shape_packed.total() * elempack / 4; - // pack4 - if (shape.dims == 0 || elempack == 4) - { - pipeline_clip_pack4 = new Pipeline(vkdev); - pipeline_clip_pack4->set_optimal_local_size_xyz(local_size_xyz); - pipeline_clip_pack4->create(LayerShaderType::clip_pack4, opt, specializations); - } + const int local_size_x = vkdev->info.subgroup_size(); - // pack8 - if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) - { - pipeline_clip_pack8 = new Pipeline(vkdev); - pipeline_clip_pack8->set_optimal_local_size_xyz(local_size_xyz); - pipeline_clip_pack8->create(LayerShaderType::clip_pack8, opt, specializations); - } + pipeline_clip = new Pipeline(vkdev); + pipeline_clip->set_optimal_local_size_xyz(local_size_x, 1, 1); + pipeline_clip->create(LayerShaderType::clip, opt, specializations); return 0; } @@ -108,34 +58,24 @@ int Clip_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_clip; pipeline_clip = 0; - delete pipeline_clip_pack4; - pipeline_clip_pack4 = 0; - - delete pipeline_clip_pack8; - pipeline_clip_pack8 = 0; - return 0; } int Clip_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const { - int elempack = bottom_top_blob.elempack; + const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; std::vector bindings(1); bindings[0] = bottom_top_blob; - std::vector constants(5); - constants[0].i = bottom_top_blob.dims; - constants[1].i = bottom_top_blob.w; - constants[2].i = bottom_top_blob.h * bottom_top_blob.d; - constants[3].i = bottom_top_blob.c; - constants[4].i = bottom_top_blob.cstep; - - const Pipeline* pipeline = elempack == 8 ? pipeline_clip_pack8 - : elempack == 4 ? pipeline_clip_pack4 - : pipeline_clip; + std::vector constants(1); + constants[0].u32 = n; - cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); + VkMat dispatcher; + dispatcher.w = n; + dispatcher.h = 1; + dispatcher.c = 1; + cmd.record_pipeline(pipeline_clip, bindings, constants, dispatcher); return 0; } diff --git a/src/layer/vulkan/clip_vulkan.h b/src/layer/vulkan/clip_vulkan.h index 0bac286dc..de513d848 100644 --- a/src/layer/vulkan/clip_vulkan.h +++ b/src/layer/vulkan/clip_vulkan.h @@ -21,8 +21,6 @@ public: public: Pipeline* pipeline_clip; - Pipeline* pipeline_clip_pack4; - Pipeline* pipeline_clip_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/dropout_vulkan.cpp b/src/layer/vulkan/dropout_vulkan.cpp index b0a842f3b..5911fe992 100644 --- a/src/layer/vulkan/dropout_vulkan.cpp +++ b/src/layer/vulkan/dropout_vulkan.cpp @@ -12,8 +12,6 @@ Dropout_vulkan::Dropout_vulkan() support_vulkan = true; pipeline_dropout = 0; - pipeline_dropout_pack4 = 0; - pipeline_dropout_pack8 = 0; } int Dropout_vulkan::create_pipeline(const Option& opt) @@ -40,57 +38,15 @@ int Dropout_vulkan::create_pipeline(const Option& opt) if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack); if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); - std::vector specializations(1 + 5); + std::vector specializations(1 + 1); specializations[0].f = scale; - specializations[1 + 0].i = shape_packed.dims; - specializations[1 + 1].i = shape_packed.w; - specializations[1 + 2].i = shape_packed.h; - specializations[1 + 3].i = shape_packed.c; - specializations[1 + 4].i = shape_packed.cstep; - - Mat local_size_xyz; - if (shape_packed.dims == 1) - { - local_size_xyz.w = std::min(64, shape_packed.w); - local_size_xyz.h = 1; - local_size_xyz.c = 1; - } - if (shape_packed.dims == 2) - { - local_size_xyz.w = std::min(8, shape_packed.w); - local_size_xyz.h = std::min(8, shape_packed.h); - local_size_xyz.c = 1; - } - if (shape_packed.dims == 3) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h); - local_size_xyz.c = std::min(4, shape_packed.c); - } - - // pack1 - if (shape.dims == 0 || elempack == 1) - { - pipeline_dropout = new Pipeline(vkdev); - pipeline_dropout->set_optimal_local_size_xyz(local_size_xyz); - pipeline_dropout->create(LayerShaderType::dropout, opt, specializations); - } + specializations[1 + 0].u32 = shape_packed.total() * elempack / 4; - // pack4 - if (shape.dims == 0 || elempack == 4) - { - pipeline_dropout_pack4 = new Pipeline(vkdev); - pipeline_dropout_pack4->set_optimal_local_size_xyz(local_size_xyz); - pipeline_dropout_pack4->create(LayerShaderType::dropout_pack4, opt, specializations); - } + const int local_size_x = vkdev->info.subgroup_size(); - // pack8 - if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) - { - pipeline_dropout_pack8 = new Pipeline(vkdev); - pipeline_dropout_pack8->set_optimal_local_size_xyz(local_size_xyz); - pipeline_dropout_pack8->create(LayerShaderType::dropout_pack8, opt, specializations); - } + pipeline_dropout = new Pipeline(vkdev); + pipeline_dropout->set_optimal_local_size_xyz(local_size_x, 1, 1); + pipeline_dropout->create(LayerShaderType::dropout, opt, specializations); return 0; } @@ -100,12 +56,6 @@ int Dropout_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_dropout; pipeline_dropout = 0; - delete pipeline_dropout_pack4; - pipeline_dropout_pack4 = 0; - - delete pipeline_dropout_pack8; - pipeline_dropout_pack8 = 0; - return 0; } @@ -116,23 +66,19 @@ int Dropout_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons return 0; } - int elempack = bottom_top_blob.elempack; + const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; std::vector bindings(1); bindings[0] = bottom_top_blob; - std::vector constants(5); - constants[0].i = bottom_top_blob.dims; - constants[1].i = bottom_top_blob.w; - constants[2].i = bottom_top_blob.h; - constants[3].i = bottom_top_blob.c; - constants[4].i = bottom_top_blob.cstep; - - const Pipeline* pipeline = elempack == 8 ? pipeline_dropout_pack8 - : elempack == 4 ? pipeline_dropout_pack4 - : pipeline_dropout; + std::vector constants(1); + constants[0].u32 = n; - cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); + VkMat dispatcher; + dispatcher.w = n; + dispatcher.h = 1; + dispatcher.c = 1; + cmd.record_pipeline(pipeline_dropout, bindings, constants, dispatcher); return 0; } diff --git a/src/layer/vulkan/dropout_vulkan.h b/src/layer/vulkan/dropout_vulkan.h index 503bc484a..eb82601bd 100644 --- a/src/layer/vulkan/dropout_vulkan.h +++ b/src/layer/vulkan/dropout_vulkan.h @@ -21,8 +21,6 @@ public: public: Pipeline* pipeline_dropout; - Pipeline* pipeline_dropout_pack4; - Pipeline* pipeline_dropout_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/elu_vulkan.cpp b/src/layer/vulkan/elu_vulkan.cpp index 749fc1a6b..a5e0007a4 100644 --- a/src/layer/vulkan/elu_vulkan.cpp +++ b/src/layer/vulkan/elu_vulkan.cpp @@ -12,8 +12,6 @@ ELU_vulkan::ELU_vulkan() support_vulkan = true; pipeline_elu = 0; - pipeline_elu_pack4 = 0; - pipeline_elu_pack8 = 0; } int ELU_vulkan::create_pipeline(const Option& opt) @@ -41,63 +39,15 @@ int ELU_vulkan::create_pipeline(const Option& opt) if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); - std::vector specializations(1 + 5); + std::vector specializations(1 + 1); specializations[0].f = alpha; - specializations[1 + 0].i = shape_packed.dims; - specializations[1 + 1].i = shape_packed.w; - specializations[1 + 2].i = shape_packed.h * shape_packed.d; - specializations[1 + 3].i = shape_packed.c; - specializations[1 + 4].i = shape_packed.cstep; - - Mat local_size_xyz; - if (shape_packed.dims == 1) - { - local_size_xyz.w = std::min(64, shape_packed.w); - local_size_xyz.h = 1; - local_size_xyz.c = 1; - } - if (shape_packed.dims == 2) - { - local_size_xyz.w = std::min(8, shape_packed.w); - local_size_xyz.h = std::min(8, shape_packed.h); - local_size_xyz.c = 1; - } - if (shape_packed.dims == 3) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h); - local_size_xyz.c = std::min(4, shape_packed.c); - } - if (shape_packed.dims == 4) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); - local_size_xyz.c = std::min(4, shape_packed.c); - } - - // pack1 - if (shape.dims == 0 || elempack == 1) - { - pipeline_elu = new Pipeline(vkdev); - pipeline_elu->set_optimal_local_size_xyz(local_size_xyz); - pipeline_elu->create(LayerShaderType::elu, opt, specializations); - } + specializations[1 + 0].u32 = shape_packed.total() * elempack / 4; - // pack4 - if (shape.dims == 0 || elempack == 4) - { - pipeline_elu_pack4 = new Pipeline(vkdev); - pipeline_elu_pack4->set_optimal_local_size_xyz(local_size_xyz); - pipeline_elu_pack4->create(LayerShaderType::elu_pack4, opt, specializations); - } + const int local_size_x = vkdev->info.subgroup_size(); - // pack8 - if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) - { - pipeline_elu_pack8 = new Pipeline(vkdev); - pipeline_elu_pack8->set_optimal_local_size_xyz(local_size_xyz); - pipeline_elu_pack8->create(LayerShaderType::elu_pack8, opt, specializations); - } + pipeline_elu = new Pipeline(vkdev); + pipeline_elu->set_optimal_local_size_xyz(local_size_x, 1, 1); + pipeline_elu->create(LayerShaderType::elu, opt, specializations); return 0; } @@ -107,34 +57,24 @@ int ELU_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_elu; pipeline_elu = 0; - delete pipeline_elu_pack4; - pipeline_elu_pack4 = 0; - - delete pipeline_elu_pack8; - pipeline_elu_pack8 = 0; - return 0; } int ELU_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const { - int elempack = bottom_top_blob.elempack; + const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; std::vector bindings(1); bindings[0] = bottom_top_blob; - std::vector constants(5); - constants[0].i = bottom_top_blob.dims; - constants[1].i = bottom_top_blob.w; - constants[2].i = bottom_top_blob.h * bottom_top_blob.d; - constants[3].i = bottom_top_blob.c; - constants[4].i = bottom_top_blob.cstep; - - const Pipeline* pipeline = elempack == 8 ? pipeline_elu_pack8 - : elempack == 4 ? pipeline_elu_pack4 - : pipeline_elu; + std::vector constants(1); + constants[0].u32 = n; - cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); + VkMat dispatcher; + dispatcher.w = n; + dispatcher.h = 1; + dispatcher.c = 1; + cmd.record_pipeline(pipeline_elu, bindings, constants, dispatcher); return 0; } diff --git a/src/layer/vulkan/elu_vulkan.h b/src/layer/vulkan/elu_vulkan.h index f26d7700e..edf046ad1 100644 --- a/src/layer/vulkan/elu_vulkan.h +++ b/src/layer/vulkan/elu_vulkan.h @@ -21,8 +21,6 @@ public: public: Pipeline* pipeline_elu; - Pipeline* pipeline_elu_pack4; - Pipeline* pipeline_elu_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/erf_vulkan.cpp b/src/layer/vulkan/erf_vulkan.cpp index a726b59db..1321dcad4 100644 --- a/src/layer/vulkan/erf_vulkan.cpp +++ b/src/layer/vulkan/erf_vulkan.cpp @@ -12,8 +12,6 @@ Erf_vulkan::Erf_vulkan() support_vulkan = true; pipeline_erf = 0; - pipeline_erf_pack4 = 0; - pipeline_erf_pack8 = 0; } int Erf_vulkan::create_pipeline(const Option& opt) @@ -40,56 +38,14 @@ int Erf_vulkan::create_pipeline(const Option& opt) if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack); if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); - std::vector specializations(0 + 5); - specializations[0 + 0].i = shape_packed.dims; - specializations[0 + 1].i = shape_packed.w; - specializations[0 + 2].i = shape_packed.h; - specializations[0 + 3].i = shape_packed.c; - specializations[0 + 4].i = shape_packed.cstep; + std::vector specializations(1); + specializations[0].u32 = shape_packed.total() * elempack / 4; - Mat local_size_xyz; - if (shape_packed.dims == 1) - { - local_size_xyz.w = std::min(64, shape_packed.w); - local_size_xyz.h = 1; - local_size_xyz.c = 1; - } - if (shape_packed.dims == 2) - { - local_size_xyz.w = std::min(8, shape_packed.w); - local_size_xyz.h = std::min(8, shape_packed.h); - local_size_xyz.c = 1; - } - if (shape_packed.dims == 3) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h); - local_size_xyz.c = std::min(4, shape_packed.c); - } + const int local_size_x = vkdev->info.subgroup_size(); - // pack1 - if (shape.dims == 0 || elempack == 1) - { - pipeline_erf = new Pipeline(vkdev); - pipeline_erf->set_optimal_local_size_xyz(local_size_xyz); - pipeline_erf->create(LayerShaderType::erf, opt, specializations); - } - - // pack4 - if (shape.dims == 0 || elempack == 4) - { - pipeline_erf_pack4 = new Pipeline(vkdev); - pipeline_erf_pack4->set_optimal_local_size_xyz(local_size_xyz); - pipeline_erf_pack4->create(LayerShaderType::erf_pack4, opt, specializations); - } - - // pack8 - if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) - { - pipeline_erf_pack8 = new Pipeline(vkdev); - pipeline_erf_pack8->set_optimal_local_size_xyz(local_size_xyz); - pipeline_erf_pack8->create(LayerShaderType::erf_pack8, opt, specializations); - } + pipeline_erf = new Pipeline(vkdev); + pipeline_erf->set_optimal_local_size_xyz(local_size_x, 1, 1); + pipeline_erf->create(LayerShaderType::erf, opt, specializations); return 0; } @@ -99,34 +55,24 @@ int Erf_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_erf; pipeline_erf = 0; - delete pipeline_erf_pack4; - pipeline_erf_pack4 = 0; - - delete pipeline_erf_pack8; - pipeline_erf_pack8 = 0; - return 0; } int Erf_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const { - int elempack = bottom_top_blob.elempack; + const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; std::vector bindings(1); bindings[0] = bottom_top_blob; - std::vector constants(5); - constants[0].i = bottom_top_blob.dims; - constants[1].i = bottom_top_blob.w; - constants[2].i = bottom_top_blob.h; - constants[3].i = bottom_top_blob.c; - constants[4].i = bottom_top_blob.cstep; - - const Pipeline* pipeline = elempack == 8 ? pipeline_erf_pack8 - : elempack == 4 ? pipeline_erf_pack4 - : pipeline_erf; + std::vector constants(1); + constants[0].u32 = n; - cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); + VkMat dispatcher; + dispatcher.w = n; + dispatcher.h = 1; + dispatcher.c = 1; + cmd.record_pipeline(pipeline_erf, bindings, constants, dispatcher); return 0; } diff --git a/src/layer/vulkan/erf_vulkan.h b/src/layer/vulkan/erf_vulkan.h index 30f0f8798..de20f1b76 100644 --- a/src/layer/vulkan/erf_vulkan.h +++ b/src/layer/vulkan/erf_vulkan.h @@ -21,8 +21,6 @@ public: public: Pipeline* pipeline_erf; - Pipeline* pipeline_erf_pack4; - Pipeline* pipeline_erf_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/gelu_vulkan.cpp b/src/layer/vulkan/gelu_vulkan.cpp index f5be26f17..9823ff849 100644 --- a/src/layer/vulkan/gelu_vulkan.cpp +++ b/src/layer/vulkan/gelu_vulkan.cpp @@ -12,8 +12,6 @@ GELU_vulkan::GELU_vulkan() support_vulkan = true; pipeline_gelu = 0; - pipeline_gelu_pack4 = 0; - pipeline_gelu_pack8 = 0; } int GELU_vulkan::create_pipeline(const Option& opt) @@ -41,62 +39,14 @@ int GELU_vulkan::create_pipeline(const Option& opt) if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); - std::vector specializations(0 + 5); - specializations[0 + 0].i = shape_packed.dims; - specializations[0 + 1].i = shape_packed.w; - specializations[0 + 2].i = shape_packed.h * shape_packed.d; - specializations[0 + 3].i = shape_packed.c; - specializations[0 + 4].i = shape_packed.cstep; + std::vector specializations(1); + specializations[0].u32 = shape_packed.total() * elempack / 4; - Mat local_size_xyz; - if (shape_packed.dims == 1) - { - local_size_xyz.w = std::min(64, shape_packed.w); - local_size_xyz.h = 1; - local_size_xyz.c = 1; - } - if (shape_packed.dims == 2) - { - local_size_xyz.w = std::min(8, shape_packed.w); - local_size_xyz.h = std::min(8, shape_packed.h); - local_size_xyz.c = 1; - } - if (shape_packed.dims == 3) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h); - local_size_xyz.c = std::min(4, shape_packed.c); - } - if (shape_packed.dims == 4) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); - local_size_xyz.c = std::min(4, shape_packed.c); - } + const int local_size_x = vkdev->info.subgroup_size(); - // pack1 - if (shape.dims == 0 || elempack == 1) - { - pipeline_gelu = new Pipeline(vkdev); - pipeline_gelu->set_optimal_local_size_xyz(local_size_xyz); - pipeline_gelu->create(LayerShaderType::gelu, opt, specializations); - } - - // pack4 - if (shape.dims == 0 || elempack == 4) - { - pipeline_gelu_pack4 = new Pipeline(vkdev); - pipeline_gelu_pack4->set_optimal_local_size_xyz(local_size_xyz); - pipeline_gelu_pack4->create(LayerShaderType::gelu_pack4, opt, specializations); - } - - // pack8 - if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) - { - pipeline_gelu_pack8 = new Pipeline(vkdev); - pipeline_gelu_pack8->set_optimal_local_size_xyz(local_size_xyz); - pipeline_gelu_pack8->create(LayerShaderType::gelu_pack8, opt, specializations); - } + pipeline_gelu = new Pipeline(vkdev); + pipeline_gelu->set_optimal_local_size_xyz(local_size_x, 1, 1); + pipeline_gelu->create(LayerShaderType::gelu, opt, specializations); return 0; } @@ -106,34 +56,24 @@ int GELU_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_gelu; pipeline_gelu = 0; - delete pipeline_gelu_pack4; - pipeline_gelu_pack4 = 0; - - delete pipeline_gelu_pack8; - pipeline_gelu_pack8 = 0; - return 0; } int GELU_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const { - int elempack = bottom_top_blob.elempack; + const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; std::vector bindings(1); bindings[0] = bottom_top_blob; - std::vector constants(5); - constants[0].i = bottom_top_blob.dims; - constants[1].i = bottom_top_blob.w; - constants[2].i = bottom_top_blob.h * bottom_top_blob.d; - constants[3].i = bottom_top_blob.c; - constants[4].i = bottom_top_blob.cstep; - - const Pipeline* pipeline = elempack == 8 ? pipeline_gelu_pack8 - : elempack == 4 ? pipeline_gelu_pack4 - : pipeline_gelu; + std::vector constants(1); + constants[0].u32 = n; - cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); + VkMat dispatcher; + dispatcher.w = n; + dispatcher.h = 1; + dispatcher.c = 1; + cmd.record_pipeline(pipeline_gelu, bindings, constants, dispatcher); return 0; } diff --git a/src/layer/vulkan/gelu_vulkan.h b/src/layer/vulkan/gelu_vulkan.h index 47c16f6a6..1c3eb98d2 100644 --- a/src/layer/vulkan/gelu_vulkan.h +++ b/src/layer/vulkan/gelu_vulkan.h @@ -21,8 +21,6 @@ public: public: Pipeline* pipeline_gelu; - Pipeline* pipeline_gelu_pack4; - Pipeline* pipeline_gelu_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/hardsigmoid_vulkan.cpp b/src/layer/vulkan/hardsigmoid_vulkan.cpp index 823a737c4..3617b2fae 100644 --- a/src/layer/vulkan/hardsigmoid_vulkan.cpp +++ b/src/layer/vulkan/hardsigmoid_vulkan.cpp @@ -12,8 +12,6 @@ HardSigmoid_vulkan::HardSigmoid_vulkan() support_vulkan = true; pipeline_hardsigmoid = 0; - pipeline_hardsigmoid_pack4 = 0; - pipeline_hardsigmoid_pack8 = 0; } int HardSigmoid_vulkan::create_pipeline(const Option& opt) @@ -40,58 +38,16 @@ int HardSigmoid_vulkan::create_pipeline(const Option& opt) if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack); if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); - std::vector specializations(2 + 5); + std::vector specializations(2 + 1); specializations[0].f = alpha; specializations[1].f = beta; - specializations[2 + 0].i = shape_packed.dims; - specializations[2 + 1].i = shape_packed.w; - specializations[2 + 2].i = shape_packed.h; - specializations[2 + 3].i = shape_packed.c; - specializations[2 + 4].i = shape_packed.cstep; - - Mat local_size_xyz; - if (shape_packed.dims == 1) - { - local_size_xyz.w = std::min(64, shape_packed.w); - local_size_xyz.h = 1; - local_size_xyz.c = 1; - } - if (shape_packed.dims == 2) - { - local_size_xyz.w = std::min(8, shape_packed.w); - local_size_xyz.h = std::min(8, shape_packed.h); - local_size_xyz.c = 1; - } - if (shape_packed.dims == 3) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h); - local_size_xyz.c = std::min(4, shape_packed.c); - } - - // pack1 - if (shape.dims == 0 || elempack == 1) - { - pipeline_hardsigmoid = new Pipeline(vkdev); - pipeline_hardsigmoid->set_optimal_local_size_xyz(local_size_xyz); - pipeline_hardsigmoid->create(LayerShaderType::hardsigmoid, opt, specializations); - } + specializations[2 + 0].u32 = shape_packed.total() * elempack / 4; - // pack4 - if (shape.dims == 0 || elempack == 4) - { - pipeline_hardsigmoid_pack4 = new Pipeline(vkdev); - pipeline_hardsigmoid_pack4->set_optimal_local_size_xyz(local_size_xyz); - pipeline_hardsigmoid_pack4->create(LayerShaderType::hardsigmoid_pack4, opt, specializations); - } + const int local_size_x = vkdev->info.subgroup_size(); - // pack8 - if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) - { - pipeline_hardsigmoid_pack8 = new Pipeline(vkdev); - pipeline_hardsigmoid_pack8->set_optimal_local_size_xyz(local_size_xyz); - pipeline_hardsigmoid_pack8->create(LayerShaderType::hardsigmoid_pack8, opt, specializations); - } + pipeline_hardsigmoid = new Pipeline(vkdev); + pipeline_hardsigmoid->set_optimal_local_size_xyz(local_size_x, 1, 1); + pipeline_hardsigmoid->create(LayerShaderType::hardsigmoid, opt, specializations); return 0; } @@ -101,34 +57,24 @@ int HardSigmoid_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_hardsigmoid; pipeline_hardsigmoid = 0; - delete pipeline_hardsigmoid_pack4; - pipeline_hardsigmoid_pack4 = 0; - - delete pipeline_hardsigmoid_pack8; - pipeline_hardsigmoid_pack8 = 0; - return 0; } int HardSigmoid_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const { - int elempack = bottom_top_blob.elempack; + const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; std::vector bindings(1); bindings[0] = bottom_top_blob; - std::vector constants(5); - constants[0].i = bottom_top_blob.dims; - constants[1].i = bottom_top_blob.w; - constants[2].i = bottom_top_blob.h; - constants[3].i = bottom_top_blob.c; - constants[4].i = bottom_top_blob.cstep; - - const Pipeline* pipeline = elempack == 8 ? pipeline_hardsigmoid_pack8 - : elempack == 4 ? pipeline_hardsigmoid_pack4 - : pipeline_hardsigmoid; + std::vector constants(1); + constants[0].u32 = n; - cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); + VkMat dispatcher; + dispatcher.w = n; + dispatcher.h = 1; + dispatcher.c = 1; + cmd.record_pipeline(pipeline_hardsigmoid, bindings, constants, dispatcher); return 0; } diff --git a/src/layer/vulkan/hardsigmoid_vulkan.h b/src/layer/vulkan/hardsigmoid_vulkan.h index 200dd54cc..432460b89 100644 --- a/src/layer/vulkan/hardsigmoid_vulkan.h +++ b/src/layer/vulkan/hardsigmoid_vulkan.h @@ -21,8 +21,6 @@ public: public: Pipeline* pipeline_hardsigmoid; - Pipeline* pipeline_hardsigmoid_pack4; - Pipeline* pipeline_hardsigmoid_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/hardswish_vulkan.cpp b/src/layer/vulkan/hardswish_vulkan.cpp index c3654044c..4e6234eca 100644 --- a/src/layer/vulkan/hardswish_vulkan.cpp +++ b/src/layer/vulkan/hardswish_vulkan.cpp @@ -12,8 +12,6 @@ HardSwish_vulkan::HardSwish_vulkan() support_vulkan = true; pipeline_hardswish = 0; - pipeline_hardswish_pack4 = 0; - pipeline_hardswish_pack8 = 0; } int HardSwish_vulkan::create_pipeline(const Option& opt) @@ -40,58 +38,16 @@ int HardSwish_vulkan::create_pipeline(const Option& opt) if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack); if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); - std::vector specializations(2 + 5); + std::vector specializations(2 + 1); specializations[0].f = alpha; specializations[1].f = beta; - specializations[2 + 0].i = shape_packed.dims; - specializations[2 + 1].i = shape_packed.w; - specializations[2 + 2].i = shape_packed.h; - specializations[2 + 3].i = shape_packed.c; - specializations[2 + 4].i = shape_packed.cstep; - - Mat local_size_xyz; - if (shape_packed.dims == 1) - { - local_size_xyz.w = std::min(64, shape_packed.w); - local_size_xyz.h = 1; - local_size_xyz.c = 1; - } - if (shape_packed.dims == 2) - { - local_size_xyz.w = std::min(8, shape_packed.w); - local_size_xyz.h = std::min(8, shape_packed.h); - local_size_xyz.c = 1; - } - if (shape_packed.dims == 3) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h); - local_size_xyz.c = std::min(4, shape_packed.c); - } - - // pack1 - if (shape.dims == 0 || elempack == 1) - { - pipeline_hardswish = new Pipeline(vkdev); - pipeline_hardswish->set_optimal_local_size_xyz(local_size_xyz); - pipeline_hardswish->create(LayerShaderType::hardswish, opt, specializations); - } + specializations[2 + 0].u32 = shape_packed.total() * elempack / 4; - // pack4 - if (shape.dims == 0 || elempack == 4) - { - pipeline_hardswish_pack4 = new Pipeline(vkdev); - pipeline_hardswish_pack4->set_optimal_local_size_xyz(local_size_xyz); - pipeline_hardswish_pack4->create(LayerShaderType::hardswish_pack4, opt, specializations); - } + const int local_size_x = vkdev->info.subgroup_size(); - // pack8 - if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) - { - pipeline_hardswish_pack8 = new Pipeline(vkdev); - pipeline_hardswish_pack8->set_optimal_local_size_xyz(local_size_xyz); - pipeline_hardswish_pack8->create(LayerShaderType::hardswish_pack8, opt, specializations); - } + pipeline_hardswish = new Pipeline(vkdev); + pipeline_hardswish->set_optimal_local_size_xyz(local_size_x, 1, 1); + pipeline_hardswish->create(LayerShaderType::hardswish, opt, specializations); return 0; } @@ -101,34 +57,24 @@ int HardSwish_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_hardswish; pipeline_hardswish = 0; - delete pipeline_hardswish_pack4; - pipeline_hardswish_pack4 = 0; - - delete pipeline_hardswish_pack8; - pipeline_hardswish_pack8 = 0; - return 0; } int HardSwish_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const { - int elempack = bottom_top_blob.elempack; + const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; std::vector bindings(1); bindings[0] = bottom_top_blob; - std::vector constants(5); - constants[0].i = bottom_top_blob.dims; - constants[1].i = bottom_top_blob.w; - constants[2].i = bottom_top_blob.h; - constants[3].i = bottom_top_blob.c; - constants[4].i = bottom_top_blob.cstep; - - const Pipeline* pipeline = elempack == 8 ? pipeline_hardswish_pack8 - : elempack == 4 ? pipeline_hardswish_pack4 - : pipeline_hardswish; + std::vector constants(1); + constants[0].u32 = n; - cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); + VkMat dispatcher; + dispatcher.w = n; + dispatcher.h = 1; + dispatcher.c = 1; + cmd.record_pipeline(pipeline_hardswish, bindings, constants, dispatcher); return 0; } diff --git a/src/layer/vulkan/hardswish_vulkan.h b/src/layer/vulkan/hardswish_vulkan.h index 325f3cfc3..b67e860e6 100644 --- a/src/layer/vulkan/hardswish_vulkan.h +++ b/src/layer/vulkan/hardswish_vulkan.h @@ -21,8 +21,6 @@ public: public: Pipeline* pipeline_hardswish; - Pipeline* pipeline_hardswish_pack4; - Pipeline* pipeline_hardswish_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/mish_vulkan.cpp b/src/layer/vulkan/mish_vulkan.cpp index 0e0e8b81e..6aa268d09 100644 --- a/src/layer/vulkan/mish_vulkan.cpp +++ b/src/layer/vulkan/mish_vulkan.cpp @@ -12,8 +12,6 @@ Mish_vulkan::Mish_vulkan() support_vulkan = true; pipeline_mish = 0; - pipeline_mish_pack4 = 0; - pipeline_mish_pack8 = 0; } int Mish_vulkan::create_pipeline(const Option& opt) @@ -41,62 +39,14 @@ int Mish_vulkan::create_pipeline(const Option& opt) if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); - std::vector specializations(0 + 5); - specializations[0 + 0].i = shape_packed.dims; - specializations[0 + 1].i = shape_packed.w; - specializations[0 + 2].i = shape_packed.h * shape_packed.d; - specializations[0 + 3].i = shape_packed.c; - specializations[0 + 4].i = shape_packed.cstep; + std::vector specializations(1); + specializations[0].u32 = shape_packed.total() * elempack / 4; - Mat local_size_xyz; - if (shape_packed.dims == 1) - { - local_size_xyz.w = std::min(64, shape_packed.w); - local_size_xyz.h = 1; - local_size_xyz.c = 1; - } - if (shape_packed.dims == 2) - { - local_size_xyz.w = std::min(8, shape_packed.w); - local_size_xyz.h = std::min(8, shape_packed.h); - local_size_xyz.c = 1; - } - if (shape_packed.dims == 3) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h); - local_size_xyz.c = std::min(4, shape_packed.c); - } - if (shape_packed.dims == 4) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); - local_size_xyz.c = std::min(4, shape_packed.c); - } + const int local_size_x = vkdev->info.subgroup_size(); - // pack1 - if (shape.dims == 0 || elempack == 1) - { - pipeline_mish = new Pipeline(vkdev); - pipeline_mish->set_optimal_local_size_xyz(local_size_xyz); - pipeline_mish->create(LayerShaderType::mish, opt, specializations); - } - - // pack4 - if (shape.dims == 0 || elempack == 4) - { - pipeline_mish_pack4 = new Pipeline(vkdev); - pipeline_mish_pack4->set_optimal_local_size_xyz(local_size_xyz); - pipeline_mish_pack4->create(LayerShaderType::mish_pack4, opt, specializations); - } - - // pack8 - if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) - { - pipeline_mish_pack8 = new Pipeline(vkdev); - pipeline_mish_pack8->set_optimal_local_size_xyz(local_size_xyz); - pipeline_mish_pack8->create(LayerShaderType::mish_pack8, opt, specializations); - } + pipeline_mish = new Pipeline(vkdev); + pipeline_mish->set_optimal_local_size_xyz(local_size_x, 1, 1); + pipeline_mish->create(LayerShaderType::mish, opt, specializations); return 0; } @@ -106,34 +56,24 @@ int Mish_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_mish; pipeline_mish = 0; - delete pipeline_mish_pack4; - pipeline_mish_pack4 = 0; - - delete pipeline_mish_pack8; - pipeline_mish_pack8 = 0; - return 0; } int Mish_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const { - int elempack = bottom_top_blob.elempack; + const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; std::vector bindings(1); bindings[0] = bottom_top_blob; - std::vector constants(5); - constants[0].i = bottom_top_blob.dims; - constants[1].i = bottom_top_blob.w; - constants[2].i = bottom_top_blob.h * bottom_top_blob.d; - constants[3].i = bottom_top_blob.c; - constants[4].i = bottom_top_blob.cstep; - - const Pipeline* pipeline = elempack == 8 ? pipeline_mish_pack8 - : elempack == 4 ? pipeline_mish_pack4 - : pipeline_mish; + std::vector constants(1); + constants[0].u32 = n; - cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); + VkMat dispatcher; + dispatcher.w = n; + dispatcher.h = 1; + dispatcher.c = 1; + cmd.record_pipeline(pipeline_mish, bindings, constants, dispatcher); return 0; } diff --git a/src/layer/vulkan/mish_vulkan.h b/src/layer/vulkan/mish_vulkan.h index acb5b3346..57f90b811 100644 --- a/src/layer/vulkan/mish_vulkan.h +++ b/src/layer/vulkan/mish_vulkan.h @@ -21,8 +21,6 @@ public: public: Pipeline* pipeline_mish; - Pipeline* pipeline_mish_pack4; - Pipeline* pipeline_mish_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/relu_vulkan.cpp b/src/layer/vulkan/relu_vulkan.cpp index 7763bb428..61ca1e528 100644 --- a/src/layer/vulkan/relu_vulkan.cpp +++ b/src/layer/vulkan/relu_vulkan.cpp @@ -12,8 +12,6 @@ ReLU_vulkan::ReLU_vulkan() support_vulkan = true; pipeline_relu = 0; - pipeline_relu_pack4 = 0; - pipeline_relu_pack8 = 0; } int ReLU_vulkan::create_pipeline(const Option& opt) @@ -41,63 +39,15 @@ int ReLU_vulkan::create_pipeline(const Option& opt) if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); - std::vector specializations(1 + 5); + std::vector specializations(1 + 1); specializations[0].f = slope; - specializations[1 + 0].i = shape_packed.dims; - specializations[1 + 1].i = shape_packed.w; - specializations[1 + 2].i = shape_packed.h * shape_packed.d; - specializations[1 + 3].i = shape_packed.c; - specializations[1 + 4].i = shape_packed.cstep; - - Mat local_size_xyz; - if (shape_packed.dims == 1) - { - local_size_xyz.w = std::min(64, shape_packed.w); - local_size_xyz.h = 1; - local_size_xyz.c = 1; - } - if (shape_packed.dims == 2) - { - local_size_xyz.w = std::min(8, shape_packed.w); - local_size_xyz.h = std::min(8, shape_packed.h); - local_size_xyz.c = 1; - } - if (shape_packed.dims == 3) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h); - local_size_xyz.c = std::min(4, shape_packed.c); - } - if (shape_packed.dims == 4) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); - local_size_xyz.c = std::min(4, shape_packed.c); - } - - // pack1 - if (shape.dims == 0 || elempack == 1) - { - pipeline_relu = new Pipeline(vkdev); - pipeline_relu->set_optimal_local_size_xyz(local_size_xyz); - pipeline_relu->create(LayerShaderType::relu, opt, specializations); - } + specializations[1 + 0].u32 = shape_packed.total() * elempack / 4; - // pack4 - if (shape.dims == 0 || elempack == 4) - { - pipeline_relu_pack4 = new Pipeline(vkdev); - pipeline_relu_pack4->set_optimal_local_size_xyz(local_size_xyz); - pipeline_relu_pack4->create(LayerShaderType::relu_pack4, opt, specializations); - } + const int local_size_x = vkdev->info.subgroup_size(); - // pack8 - if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) - { - pipeline_relu_pack8 = new Pipeline(vkdev); - pipeline_relu_pack8->set_optimal_local_size_xyz(local_size_xyz); - pipeline_relu_pack8->create(LayerShaderType::relu_pack8, opt, specializations); - } + pipeline_relu = new Pipeline(vkdev); + pipeline_relu->set_optimal_local_size_xyz(local_size_x, 1, 1); + pipeline_relu->create(LayerShaderType::relu, opt, specializations); return 0; } @@ -107,34 +57,24 @@ int ReLU_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_relu; pipeline_relu = 0; - delete pipeline_relu_pack4; - pipeline_relu_pack4 = 0; - - delete pipeline_relu_pack8; - pipeline_relu_pack8 = 0; - return 0; } int ReLU_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const { - int elempack = bottom_top_blob.elempack; + const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; std::vector bindings(1); bindings[0] = bottom_top_blob; - std::vector constants(5); - constants[0].i = bottom_top_blob.dims; - constants[1].i = bottom_top_blob.w; - constants[2].i = bottom_top_blob.h * bottom_top_blob.d; - constants[3].i = bottom_top_blob.c; - constants[4].i = bottom_top_blob.cstep; - - const Pipeline* pipeline = elempack == 8 ? pipeline_relu_pack8 - : elempack == 4 ? pipeline_relu_pack4 - : pipeline_relu; + std::vector constants(1); + constants[0].u32 = n; - cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); + VkMat dispatcher; + dispatcher.w = n; + dispatcher.h = 1; + dispatcher.c = 1; + cmd.record_pipeline(pipeline_relu, bindings, constants, dispatcher); return 0; } diff --git a/src/layer/vulkan/relu_vulkan.h b/src/layer/vulkan/relu_vulkan.h index c28e29a32..c84d975d9 100644 --- a/src/layer/vulkan/relu_vulkan.h +++ b/src/layer/vulkan/relu_vulkan.h @@ -21,8 +21,6 @@ public: public: Pipeline* pipeline_relu; - Pipeline* pipeline_relu_pack4; - Pipeline* pipeline_relu_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/shader/celu.comp b/src/layer/vulkan/shader/celu.comp index 2a169b905..612a44a91 100644 --- a/src/layer/vulkan/shader/celu.comp +++ b/src/layer/vulkan/shader/celu.comp @@ -6,37 +6,25 @@ layout (constant_id = 0) const float alpha = 0; #define shape_constant_id_offset 1 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; +layout (constant_id = shape_constant_id_offset + 0) const uint n = 0; -layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; +layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (push_constant) uniform parameter { - int dims; - int w; - int h; - int c; - int cstep; + uint n; } p; void main() { - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); + const uint gi = gl_GlobalInvocationID.x; - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) + if (gi >= psc(n)) return; - const int gi = gz * psc(cstep) + gy * psc(w) + gx; + afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - afp v = buffer_ld1(bottom_top_blob_data, gi); + v = max(v, afpvec4(0.0f)) + min(alpha * (exp(v / afpvec4(alpha)) - afpvec4(1.0f)), afpvec4(0.0f)); - v = max(v, afp(0.0f)) + min(alpha * (exp(v / alpha) - 1.0f), afp(0.0f)); - - buffer_st1(bottom_top_blob_data, gi, v); + buffer_st4(bottom_top_blob_data, gi, v); } diff --git a/src/layer/vulkan/shader/celu_pack4.comp b/src/layer/vulkan/shader/celu_pack4.comp deleted file mode 100644 index d895ebaf1..000000000 --- a/src/layer/vulkan/shader/celu_pack4.comp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2019 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -layout (constant_id = 0) const float alpha = 0; - -#define shape_constant_id_offset 1 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - - v = max(v, afp(0.0f)) + min(alpha * (exp(v / alpha) - 1.0f), afp(0.0f)); - - buffer_st4(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/celu_pack8.comp b/src/layer/vulkan/shader/celu_pack8.comp deleted file mode 100644 index 39c9c42a7..000000000 --- a/src/layer/vulkan/shader/celu_pack8.comp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2020 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -layout (constant_id = 0) const float alpha = 0; - -#define shape_constant_id_offset 1 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); - - v[0] = max(v[0], afp(0.0f)) + min(alpha * (exp(v[0] / alpha) - 1.0f), afp(0.0f)); - v[1] = max(v[1], afp(0.0f)) + min(alpha*(exp(v[1]/alpha)-1), afp(0.0f)); - - buffer_st8(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/clip.comp b/src/layer/vulkan/shader/clip.comp index 006d29a24..a768be169 100644 --- a/src/layer/vulkan/shader/clip.comp +++ b/src/layer/vulkan/shader/clip.comp @@ -7,37 +7,25 @@ layout (constant_id = 0) const float const_min = 0; layout (constant_id = 1) const float const_max = 0; #define shape_constant_id_offset 2 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; +layout (constant_id = shape_constant_id_offset + 0) const uint n = 0; -layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; +layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (push_constant) uniform parameter { - int dims; - int w; - int h; - int c; - int cstep; + uint n; } p; void main() { - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); + const uint gi = gl_GlobalInvocationID.x; - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; + if (gi >= psc(n)) + return; - const int gi = gz * psc(cstep) + gy * psc(w) + gx; + afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - afp v = buffer_ld1(bottom_top_blob_data, gi); + v = clamp(v, afpvec4(const_min), afpvec4(const_max)); - v = clamp(v, afp(const_min), afp(const_max)); - - buffer_st1(bottom_top_blob_data, gi, v); + buffer_st4(bottom_top_blob_data, gi, v); } diff --git a/src/layer/vulkan/shader/clip_pack4.comp b/src/layer/vulkan/shader/clip_pack4.comp deleted file mode 100644 index baeaed4e6..000000000 --- a/src/layer/vulkan/shader/clip_pack4.comp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2019 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -layout (constant_id = 0) const float const_min = 0; -layout (constant_id = 1) const float const_max = 0; - -#define shape_constant_id_offset 2 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - - v = clamp(v, afp(const_min), afp(const_max)); - - buffer_st4(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/clip_pack8.comp b/src/layer/vulkan/shader/clip_pack8.comp deleted file mode 100644 index ce2fe876b..000000000 --- a/src/layer/vulkan/shader/clip_pack8.comp +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2020 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -layout (constant_id = 0) const float const_min = 0; -layout (constant_id = 1) const float const_max = 0; - -#define shape_constant_id_offset 2 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); - - v[0] = clamp(v[0], afp(const_min), afp(const_max)); - v[1] = clamp(v[1], afp(const_min), afp(const_max)); - - buffer_st8(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/dropout.comp b/src/layer/vulkan/shader/dropout.comp index 1782e5664..497418129 100644 --- a/src/layer/vulkan/shader/dropout.comp +++ b/src/layer/vulkan/shader/dropout.comp @@ -1,4 +1,4 @@ -// Copyright 2018 Tencent +// Copyright 2019 Tencent // SPDX-License-Identifier: BSD-3-Clause #version 450 @@ -6,37 +6,26 @@ layout (constant_id = 0) const float scale = 1; #define shape_constant_id_offset 1 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; +layout (constant_id = shape_constant_id_offset + 0) const uint n = 0; -layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; + +layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (push_constant) uniform parameter { - int dims; - int w; - int h; - int c; - int cstep; + uint n; } p; void main() { - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); + uint gi = gl_GlobalInvocationID.x; - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) + if (gi >= psc(n)) return; - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afp v = buffer_ld1(bottom_top_blob_data, gi); + afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); v *= afp(scale); - buffer_st1(bottom_top_blob_data, gi, v); + buffer_st4(bottom_top_blob_data, gi, v); } diff --git a/src/layer/vulkan/shader/dropout_pack4.comp b/src/layer/vulkan/shader/dropout_pack4.comp deleted file mode 100644 index bbf2cf052..000000000 --- a/src/layer/vulkan/shader/dropout_pack4.comp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2019 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -layout (constant_id = 0) const float scale = 1; - -#define shape_constant_id_offset 1 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - - v *= afp(scale); - - buffer_st4(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/dropout_pack8.comp b/src/layer/vulkan/shader/dropout_pack8.comp deleted file mode 100644 index 8856f53f3..000000000 --- a/src/layer/vulkan/shader/dropout_pack8.comp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2020 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -layout (constant_id = 0) const float scale = 1; - -#define shape_constant_id_offset 1 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); - - v[0] = v[0] * afp(scale); - v[1] = v[1] * afp(scale); - - buffer_st8(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/elu.comp b/src/layer/vulkan/shader/elu.comp index ec5033e4b..830217f2b 100644 --- a/src/layer/vulkan/shader/elu.comp +++ b/src/layer/vulkan/shader/elu.comp @@ -6,37 +6,25 @@ layout (constant_id = 0) const float alpha = 0; #define shape_constant_id_offset 1 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; +layout (constant_id = shape_constant_id_offset + 0) const uint n = 0; -layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; +layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (push_constant) uniform parameter { - int dims; - int w; - int h; - int c; - int cstep; + uint n; } p; void main() { - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); + const uint gi = gl_GlobalInvocationID.x; - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) + if (gi >= psc(n)) return; - const int gi = gz * psc(cstep) + gy * psc(w) + gx; + afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - afp v = buffer_ld1(bottom_top_blob_data, gi); + v = mix(afpvec4(alpha) * afpvec4(exp(v) - afpvec4(1.0f)), v, greaterThan(v, afpvec4(0.0f))); - v = v > afp(0.0f) ? v : afp(alpha * (exp(v) - 1.0f)); - - buffer_st1(bottom_top_blob_data, gi, v); + buffer_st4(bottom_top_blob_data, gi, v); } diff --git a/src/layer/vulkan/shader/elu_pack4.comp b/src/layer/vulkan/shader/elu_pack4.comp deleted file mode 100644 index d44765ef7..000000000 --- a/src/layer/vulkan/shader/elu_pack4.comp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2022 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -layout (constant_id = 0) const float alpha = 0; - -#define shape_constant_id_offset 1 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - - v = mix(afpvec4(alpha) * afpvec4(exp(v) - afpvec4(1.0f)), v, greaterThan(v, afpvec4(0.0f))); - - buffer_st4(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/elu_pack8.comp b/src/layer/vulkan/shader/elu_pack8.comp deleted file mode 100644 index 4f5e243e9..000000000 --- a/src/layer/vulkan/shader/elu_pack8.comp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2022 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -layout (constant_id = 0) const float alpha = 0; - -#define shape_constant_id_offset 1 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); - - v[0] = mix(afpvec4(alpha) * afpvec4(exp(v[0]) - afpvec4(1.0f)), v[0], greaterThan(v[0], afpvec4(0.0f))); - v[1] = mix(afpvec4(alpha) * afpvec4(exp(v[1]) - afpvec4(1.0f)), v[1], greaterThan(v[1], afpvec4(0.0f))); - - buffer_st8(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/erf.comp b/src/layer/vulkan/shader/erf.comp index 100834c28..395cf5fbe 100644 --- a/src/layer/vulkan/shader/erf.comp +++ b/src/layer/vulkan/shader/erf.comp @@ -3,53 +3,41 @@ #version 450 -#define shape_constant_id_offset 0 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; +layout (constant_id = 0) const uint n = 0; -layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; +layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (push_constant) uniform parameter { - int dims; - int w; - int h; - int c; - int cstep; + uint n; } p; -float erf(float x) +afpvec4 erf(afpvec4 x) { - float a1 = 0.254829592f; - float a2 = -0.284496736f; - float a3 = 1.421413741f; - float a4 = -1.453152027f; - float a5 = 1.061405429f; - float p = 0.3275911f; - float s = sign(x); - float x_abs = abs(x); - float t = 1.0f/(1.0f + p*x_abs); - float y = 1.0f - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x_abs*x_abs); + afpvec4 a1 = afpvec4(0.254829592f); + afpvec4 a2 = afpvec4(-0.284496736f); + afpvec4 a3 = afpvec4(1.421413741f); + afpvec4 a4 = afpvec4(-1.453152027f); + afpvec4 a5 = afpvec4(1.061405429f); + afpvec4 p = afpvec4(0.3275911f); + afpvec4 s = sign(x); + afpvec4 x_abs = abs(x); + afpvec4 t = 1.0f / (1.0f + p * x_abs); + afpvec4 y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x_abs * x_abs); return s * y; } void main() { - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); + const uint gi = gl_GlobalInvocationID.x; - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) + if (gi >= psc(n)) return; - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - afp v = buffer_ld1(bottom_top_blob_data, gi); + afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); v = erf(v); - buffer_st1(bottom_top_blob_data, gi, v); + buffer_st4(bottom_top_blob_data, gi, v); } diff --git a/src/layer/vulkan/shader/erf_pack4.comp b/src/layer/vulkan/shader/erf_pack4.comp deleted file mode 100644 index 72497098e..000000000 --- a/src/layer/vulkan/shader/erf_pack4.comp +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2019 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -#define shape_constant_id_offset 0 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -afpvec4 erf(afpvec4 x) -{ - afpvec4 a1 = afpvec4(0.254829592f); - afpvec4 a2 = afpvec4(-0.284496736f); - afpvec4 a3 = afpvec4(1.421413741f); - afpvec4 a4 = afpvec4(-1.453152027f); - afpvec4 a5 = afpvec4(1.061405429f); - afpvec4 p = afpvec4(0.3275911f); - afpvec4 s = sign(x); - afpvec4 x_abs = abs(x); - afpvec4 t = 1.0f / (1.0f + p * x_abs); - afpvec4 y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x_abs * x_abs); - return s * y; -} - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - - v = erf(v); - - buffer_st4(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/erf_pack8.comp b/src/layer/vulkan/shader/erf_pack8.comp deleted file mode 100644 index f6bf7bf13..000000000 --- a/src/layer/vulkan/shader/erf_pack8.comp +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2020 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -#define shape_constant_id_offset 0 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -afpvec4 erf(afpvec4 x) -{ - afpvec4 a1 = afpvec4(0.254829592f); - afpvec4 a2 = afpvec4(-0.284496736f); - afpvec4 a3 = afpvec4(1.421413741f); - afpvec4 a4 = afpvec4(-1.453152027f); - afpvec4 a5 = afpvec4(1.061405429f); - afpvec4 p = afpvec4(0.3275911f); - afpvec4 s = sign(x); - afpvec4 x_abs = abs(x); - afpvec4 t = 1.0f / (1.0f + p * x_abs); - afpvec4 y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x_abs * x_abs); - return s * y; -} - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); - - v[0] = erf(v[0]); - v[1] = erf(v[1]); - - buffer_st8(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/gelu.comp b/src/layer/vulkan/shader/gelu.comp index 165a06246..f329d1dc7 100644 --- a/src/layer/vulkan/shader/gelu.comp +++ b/src/layer/vulkan/shader/gelu.comp @@ -4,43 +4,32 @@ #version 450 #define shape_constant_id_offset 0 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; +layout (constant_id = 0) const uint n = 0; -layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; +layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (push_constant) uniform parameter { - int dims; - int w; - int h; - int c; - int cstep; + uint n; } p; void main() { - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); + uint gi = gl_GlobalInvocationID.x; - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) + if (gi >= psc(n)) return; - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - afp v = buffer_ld1(bottom_top_blob_data, gi); + afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); // y = 0.5x * (1 + tanh(sqrt(2/Pi) * (x + 0.044715x^3))) #if NCNN_moltenvk - v = afp(0.5f) * v * (afp(1.0f) + afp(tanh(float(afp(0.79788452f) * (v + afp(0.044715f) * v * v * v))))); + v = afpvec4(0.5f) * v * (afpvec4(1.0f) + afpvec4(tanh(vec4(afpvec4(0.79788452f) * (v + afpvec4(0.044715f) * v * v * v))))); #else - v = afp(0.5f) * v * (afp(1.0f) + tanh(afp(0.79788452f) * (v + afp(0.044715f) * v * v * v))); + v = afpvec4(0.5f) * v * (afpvec4(1.0f) + tanh(afpvec4(0.79788452f) * (v + afpvec4(0.044715f) * v * v * v))); #endif - buffer_st1(bottom_top_blob_data, gi, v); + buffer_st4(bottom_top_blob_data, gi, v); } diff --git a/src/layer/vulkan/shader/gelu_pack4.comp b/src/layer/vulkan/shader/gelu_pack4.comp deleted file mode 100644 index 79da60527..000000000 --- a/src/layer/vulkan/shader/gelu_pack4.comp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2023 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -#define shape_constant_id_offset 0 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - - // y = 0.5x * (1 + tanh(sqrt(2/Pi) * (x + 0.044715x^3))) - -#if NCNN_moltenvk - v = afpvec4(0.5f) * v * (afpvec4(1.0f) + afpvec4(tanh(vec4(afpvec4(0.79788452f) * (v + afpvec4(0.044715f) * v * v * v))))); -#else - v = afpvec4(0.5f) * v * (afpvec4(1.0f) + tanh(afpvec4(0.79788452f) * (v + afpvec4(0.044715f) * v * v * v))); -#endif - - buffer_st4(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/gelu_pack8.comp b/src/layer/vulkan/shader/gelu_pack8.comp deleted file mode 100644 index 6ab6286e9..000000000 --- a/src/layer/vulkan/shader/gelu_pack8.comp +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2023 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -#define shape_constant_id_offset 0 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); - - // y = 0.5x * (1 + tanh(sqrt(2/Pi) * (x + 0.044715x^3))) - -#if NCNN_moltenvk - v[0] = afpvec4(0.5f) * v[0] * (afpvec4(1.0f) + afpvec4(tanh(vec4(afpvec4(0.79788452f) * (v[0] + afpvec4(0.044715f) * v[0] * v[0] * v[0]))))); - v[1] = afpvec4(0.5f) * v[1] * (afpvec4(1.0f) + afpvec4(tanh(vec4(afpvec4(0.79788452f) * (v[1] + afpvec4(0.044715f) * v[1] * v[1] * v[1]))))); -#else - v[0] = afpvec4(0.5f) * v[0] * (afpvec4(1.0f) + tanh(afpvec4(0.79788452f) * (v[0] + afpvec4(0.044715f) * v[0] * v[0] * v[0]))); - v[1] = afpvec4(0.5f) * v[1] * (afpvec4(1.0f) + tanh(afpvec4(0.79788452f) * (v[1] + afpvec4(0.044715f) * v[1] * v[1] * v[1]))); -#endif - - buffer_st8(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/hardsigmoid.comp b/src/layer/vulkan/shader/hardsigmoid.comp index 220f0e2f7..f5f2a9012 100644 --- a/src/layer/vulkan/shader/hardsigmoid.comp +++ b/src/layer/vulkan/shader/hardsigmoid.comp @@ -1,4 +1,4 @@ -// Copyright 2018 Tencent +// Copyright 2019 Tencent // SPDX-License-Identifier: BSD-3-Clause #version 450 @@ -7,37 +7,26 @@ layout (constant_id = 0) const float alpha = 0; layout (constant_id = 1) const float beta = 0; #define shape_constant_id_offset 2 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; +layout (constant_id = shape_constant_id_offset + 0) const uint n = 0; -layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; + +layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (push_constant) uniform parameter { - int dims; - int w; - int h; - int c; - int cstep; + uint n; } p; void main() { - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); + uint gi = gl_GlobalInvocationID.x; - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) + if (gi >= psc(n)) return; - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afp v = buffer_ld1(bottom_top_blob_data, gi); + afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); v = clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - buffer_st1(bottom_top_blob_data, gi, v); + buffer_st4(bottom_top_blob_data, gi, v); } diff --git a/src/layer/vulkan/shader/hardsigmoid_pack4.comp b/src/layer/vulkan/shader/hardsigmoid_pack4.comp deleted file mode 100644 index 0a18a31f9..000000000 --- a/src/layer/vulkan/shader/hardsigmoid_pack4.comp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2019 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -layout (constant_id = 0) const float alpha = 0; -layout (constant_id = 1) const float beta = 0; - -#define shape_constant_id_offset 2 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - - v = clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - - buffer_st4(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/hardsigmoid_pack8.comp b/src/layer/vulkan/shader/hardsigmoid_pack8.comp deleted file mode 100644 index bda0f1ecd..000000000 --- a/src/layer/vulkan/shader/hardsigmoid_pack8.comp +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2020 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -layout (constant_id = 0) const float alpha = 0; -layout (constant_id = 1) const float beta = 0; - -#define shape_constant_id_offset 2 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); - - v[0] = clamp(v[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v[1] = clamp(v[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - - buffer_st8(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/hardswish.comp b/src/layer/vulkan/shader/hardswish.comp index e5d16582b..45c47437b 100644 --- a/src/layer/vulkan/shader/hardswish.comp +++ b/src/layer/vulkan/shader/hardswish.comp @@ -1,4 +1,4 @@ -// Copyright 2018 Tencent +// Copyright 2019 Tencent // SPDX-License-Identifier: BSD-3-Clause #version 450 @@ -7,37 +7,25 @@ layout (constant_id = 0) const float alpha = 0; layout (constant_id = 1) const float beta = 0; #define shape_constant_id_offset 2 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; +layout (constant_id = shape_constant_id_offset + 0) const uint n = 0; -layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; +layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (push_constant) uniform parameter { - int dims; - int w; - int h; - int c; - int cstep; + uint n; } p; void main() { - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); + uint gi = gl_GlobalInvocationID.x; - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) + if (gi >= psc(n)) return; - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afp v = buffer_ld1(bottom_top_blob_data, gi); + afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); v = v * clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - buffer_st1(bottom_top_blob_data, gi, v); + buffer_st4(bottom_top_blob_data, gi, v); } diff --git a/src/layer/vulkan/shader/hardswish_pack4.comp b/src/layer/vulkan/shader/hardswish_pack4.comp deleted file mode 100644 index ff2ff7b92..000000000 --- a/src/layer/vulkan/shader/hardswish_pack4.comp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2019 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -layout (constant_id = 0) const float alpha = 0; -layout (constant_id = 1) const float beta = 0; - -#define shape_constant_id_offset 2 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - - v = v * clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - - buffer_st4(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/hardswish_pack8.comp b/src/layer/vulkan/shader/hardswish_pack8.comp deleted file mode 100644 index a1e529bfc..000000000 --- a/src/layer/vulkan/shader/hardswish_pack8.comp +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2020 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -layout (constant_id = 0) const float alpha = 0; -layout (constant_id = 1) const float beta = 0; - -#define shape_constant_id_offset 2 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); - - v[0] = v[0] * clamp(v[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - v[1] = v[1] * clamp(v[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); - - buffer_st8(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/mish.comp b/src/layer/vulkan/shader/mish.comp index be1b4fff9..60d479799 100644 --- a/src/layer/vulkan/shader/mish.comp +++ b/src/layer/vulkan/shader/mish.comp @@ -1,44 +1,32 @@ -// Copyright 2020 Tencent +// Copyright 2019 Tencent // SPDX-License-Identifier: BSD-3-Clause #version 450 -#define shape_constant_id_offset 0 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; +layout (constant_id = 0) const uint n = 0; -layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; + +layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (push_constant) uniform parameter { - int dims; - int w; - int h; - int c; - int cstep; + uint n; } p; void main() { - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); + uint gi = gl_GlobalInvocationID.x; - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) + if (gi >= psc(n)) return; - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afp v = buffer_ld1(bottom_top_blob_data, gi); + afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); #if NCNN_moltenvk - v = v * afp(tanh(float(log(exp(v) + afp(1.f))))); + v = v * afpvec4(tanh(vec4(log(exp(v) + afpvec4(1.f))))); #else - v = v * tanh(log(exp(v) + afp(1.f))); + v = v * tanh(log(exp(v) + afpvec4(1.f))); #endif - buffer_st1(bottom_top_blob_data, gi, v); + buffer_st4(bottom_top_blob_data, gi, v); } diff --git a/src/layer/vulkan/shader/mish_pack4.comp b/src/layer/vulkan/shader/mish_pack4.comp deleted file mode 100644 index 7123394a4..000000000 --- a/src/layer/vulkan/shader/mish_pack4.comp +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2019 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -#define shape_constant_id_offset 0 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - -#if NCNN_moltenvk - v = v * afpvec4(tanh(vec4(log(exp(v) + afpvec4(1.f))))); -#else - v = v * tanh(log(exp(v) + afpvec4(1.f))); -#endif - - buffer_st4(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/mish_pack8.comp b/src/layer/vulkan/shader/mish_pack8.comp deleted file mode 100644 index 0602fbd0b..000000000 --- a/src/layer/vulkan/shader/mish_pack8.comp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2020 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -#define shape_constant_id_offset 0 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); - -#if NCNN_moltenvk - v[0] = v[0] * afpvec4(tanh(vec4(log(exp(v[0]) + afpvec4(1.f))))); - v[1] = v[1] * afpvec4(tanh(vec4(log(exp(v[1]) + afpvec4(1.f))))); -#else - v[0] = v[0] * tanh(log(exp(v[0]) + afpvec4(1.f))); - v[1] = v[1] * tanh(log(exp(v[1]) + afpvec4(1.f))); -#endif - - buffer_st8(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/relu.comp b/src/layer/vulkan/shader/relu.comp index b92759021..359728eda 100644 --- a/src/layer/vulkan/shader/relu.comp +++ b/src/layer/vulkan/shader/relu.comp @@ -1,4 +1,4 @@ -// Copyright 2018 Tencent +// Copyright 2019 Tencent // SPDX-License-Identifier: BSD-3-Clause #version 450 @@ -6,40 +6,29 @@ layout (constant_id = 0) const float slope = 0; #define shape_constant_id_offset 1 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; +layout (constant_id = shape_constant_id_offset + 0) const uint n = 0; -layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; + +layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (push_constant) uniform parameter { - int dims; - int w; - int h; - int c; - int cstep; + uint n; } p; void main() { - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); + uint gi = gl_GlobalInvocationID.x; - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) + if (gi >= psc(n)) return; - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afp v = buffer_ld1(bottom_top_blob_data, gi); + afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); if (slope == 0) v = max(v, afp(0.f)); else - v = v < afp(0.f) ? v * afp(slope) : v; + v = mix(v, v * afp(slope), lessThan(v, afpvec4(0.f))); - buffer_st1(bottom_top_blob_data, gi, v); + buffer_st4(bottom_top_blob_data, gi, v); } diff --git a/src/layer/vulkan/shader/relu_pack4.comp b/src/layer/vulkan/shader/relu_pack4.comp deleted file mode 100644 index 493949fa9..000000000 --- a/src/layer/vulkan/shader/relu_pack4.comp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2019 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -layout (constant_id = 0) const float slope = 0; - -#define shape_constant_id_offset 1 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - - if (slope == 0) - v = max(v, afp(0.f)); - else - v = mix(v, v * afp(slope), lessThan(v, afpvec4(0.f))); - - buffer_st4(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/relu_pack8.comp b/src/layer/vulkan/shader/relu_pack8.comp deleted file mode 100644 index 4df1640f0..000000000 --- a/src/layer/vulkan/shader/relu_pack8.comp +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2020 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -layout (constant_id = 0) const float slope = 0; - -#define shape_constant_id_offset 1 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); - - if (slope == 0) - { - v[0] = max(v[0], afp(0.f)); - v[1] = max(v[1], afp(0.f)); - } - else - { - v[0] = mix(v[0], v[0] * afp(slope), lessThan(v[0], afpvec4(0.f))); - v[1] = mix(v[1], v[1] * afp(slope), lessThan(v[1], afpvec4(0.f))); - } - - buffer_st8(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/swish.comp b/src/layer/vulkan/shader/swish.comp index a66127005..96e68a3a0 100644 --- a/src/layer/vulkan/shader/swish.comp +++ b/src/layer/vulkan/shader/swish.comp @@ -1,40 +1,27 @@ -// Copyright 2020 Tencent +// Copyright 2019 Tencent // SPDX-License-Identifier: BSD-3-Clause #version 450 -#define shape_constant_id_offset 0 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; +layout (constant_id = 0) const uint n = 0; -layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; +layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (push_constant) uniform parameter { - int dims; - int w; - int h; - int c; - int cstep; + uint n; } p; void main() { - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); + uint gi = gl_GlobalInvocationID.x; - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) + if (gi >= psc(n)) return; - const int gi = gz * psc(cstep) + gy * psc(w) + gx; + afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - afp v = buffer_ld1(bottom_top_blob_data, gi); + v = v / (afpvec4(1.f) + exp(-v)); - v = v / (afp(1.f) + exp(-v)); - - buffer_st1(bottom_top_blob_data, gi, v); + buffer_st4(bottom_top_blob_data, gi, v); } diff --git a/src/layer/vulkan/shader/swish_pack4.comp b/src/layer/vulkan/shader/swish_pack4.comp deleted file mode 100644 index 166792d13..000000000 --- a/src/layer/vulkan/shader/swish_pack4.comp +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2019 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -#define shape_constant_id_offset 0 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - - v = v / (afpvec4(1.f) + exp(-v)); - - buffer_st4(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/swish_pack8.comp b/src/layer/vulkan/shader/swish_pack8.comp deleted file mode 100644 index 49278f198..000000000 --- a/src/layer/vulkan/shader/swish_pack8.comp +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2020 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -#define shape_constant_id_offset 0 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); - - v[0] = v[0] / (afpvec4(1.f) + exp(-v[0])); - v[1] = v[1] / (afpvec4(1.f) + exp(-v[1])); - - buffer_st8(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/tanh.comp b/src/layer/vulkan/shader/tanh.comp index 6e0c8ef0e..1d20169d6 100644 --- a/src/layer/vulkan/shader/tanh.comp +++ b/src/layer/vulkan/shader/tanh.comp @@ -3,42 +3,30 @@ #version 450 -#define shape_constant_id_offset 0 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; +layout (constant_id = 0) const uint n = 0; -layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; + +layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (push_constant) uniform parameter { - int dims; - int w; - int h; - int c; - int cstep; + uint n; } p; void main() { - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); + uint gi = gl_GlobalInvocationID.x; - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) + if (gi >= psc(n)) return; - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afp v = buffer_ld1(bottom_top_blob_data, gi); + afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); #if NCNN_moltenvk - v = afp(tanh(float(v))); + v = afpvec4(tanh(vec4(v))); #else v = tanh(v); #endif - buffer_st1(bottom_top_blob_data, gi, v); + buffer_st4(bottom_top_blob_data, gi, v); } diff --git a/src/layer/vulkan/shader/tanh_pack4.comp b/src/layer/vulkan/shader/tanh_pack4.comp deleted file mode 100644 index 33c374050..000000000 --- a/src/layer/vulkan/shader/tanh_pack4.comp +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2019 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -#define shape_constant_id_offset 0 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); - -#if NCNN_moltenvk - v = afpvec4(tanh(vec4(v))); -#else - v = tanh(v); -#endif - - buffer_st4(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/shader/tanh_pack8.comp b/src/layer/vulkan/shader/tanh_pack8.comp deleted file mode 100644 index acf0c1c50..000000000 --- a/src/layer/vulkan/shader/tanh_pack8.comp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2020 Tencent -// SPDX-License-Identifier: BSD-3-Clause - -#version 450 - -#define shape_constant_id_offset 0 -layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; -layout (constant_id = shape_constant_id_offset + 1) const int w = 0; -layout (constant_id = shape_constant_id_offset + 2) const int h = 0; -layout (constant_id = shape_constant_id_offset + 3) const int c = 0; -layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; - -layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; - -layout (push_constant) uniform parameter -{ - int dims; - int w; - int h; - int c; - int cstep; -} p; - -void main() -{ - int gx = int(gl_GlobalInvocationID.x); - int gy = int(gl_GlobalInvocationID.y); - int gz = int(gl_GlobalInvocationID.z); - - if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) - return; - - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - - afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); - -#if NCNN_moltenvk - v[0] = afpvec4(tanh(vec4(v[0]))); - v[1] = afpvec4(tanh(vec4(v[1]))); -#else - v[0] = tanh(v[0]); - v[1] = tanh(v[1]); -#endif - - buffer_st8(bottom_top_blob_data, gi, v); -} diff --git a/src/layer/vulkan/swish_vulkan.cpp b/src/layer/vulkan/swish_vulkan.cpp index eb857f34e..b2d10fd38 100644 --- a/src/layer/vulkan/swish_vulkan.cpp +++ b/src/layer/vulkan/swish_vulkan.cpp @@ -12,8 +12,6 @@ Swish_vulkan::Swish_vulkan() support_vulkan = true; pipeline_swish = 0; - pipeline_swish_pack4 = 0; - pipeline_swish_pack8 = 0; } int Swish_vulkan::create_pipeline(const Option& opt) @@ -41,62 +39,14 @@ int Swish_vulkan::create_pipeline(const Option& opt) if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); - std::vector specializations(0 + 5); - specializations[0 + 0].i = shape_packed.dims; - specializations[0 + 1].i = shape_packed.w; - specializations[0 + 2].i = shape_packed.h * shape_packed.d; - specializations[0 + 3].i = shape_packed.c; - specializations[0 + 4].i = shape_packed.cstep; + std::vector specializations(1); + specializations[0 + 0].u32 = shape_packed.total() * elempack / 4; - Mat local_size_xyz; - if (shape_packed.dims == 1) - { - local_size_xyz.w = std::min(64, shape_packed.w); - local_size_xyz.h = 1; - local_size_xyz.c = 1; - } - if (shape_packed.dims == 2) - { - local_size_xyz.w = std::min(8, shape_packed.w); - local_size_xyz.h = std::min(8, shape_packed.h); - local_size_xyz.c = 1; - } - if (shape_packed.dims == 3) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h); - local_size_xyz.c = std::min(4, shape_packed.c); - } - if (shape_packed.dims == 4) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); - local_size_xyz.c = std::min(4, shape_packed.c); - } - - // pack1 - if (shape.dims == 0 || elempack == 1) - { - pipeline_swish = new Pipeline(vkdev); - pipeline_swish->set_optimal_local_size_xyz(local_size_xyz); - pipeline_swish->create(LayerShaderType::swish, opt, specializations); - } + const int local_size_x = vkdev->info.subgroup_size(); - // pack4 - if (shape.dims == 0 || elempack == 4) - { - pipeline_swish_pack4 = new Pipeline(vkdev); - pipeline_swish_pack4->set_optimal_local_size_xyz(local_size_xyz); - pipeline_swish_pack4->create(LayerShaderType::swish_pack4, opt, specializations); - } - - // pack8 - if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) - { - pipeline_swish_pack8 = new Pipeline(vkdev); - pipeline_swish_pack8->set_optimal_local_size_xyz(local_size_xyz); - pipeline_swish_pack8->create(LayerShaderType::swish_pack8, opt, specializations); - } + pipeline_swish = new Pipeline(vkdev); + pipeline_swish->set_optimal_local_size_xyz(local_size_x, 1, 1); + pipeline_swish->create(LayerShaderType::swish, opt, specializations); return 0; } @@ -106,36 +56,25 @@ int Swish_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_swish; pipeline_swish = 0; - delete pipeline_swish_pack4; - pipeline_swish_pack4 = 0; - - delete pipeline_swish_pack8; - pipeline_swish_pack8 = 0; - return 0; } int Swish_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const { - int elempack = bottom_top_blob.elempack; + const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; std::vector bindings(1); bindings[0] = bottom_top_blob; - std::vector constants(5); - constants[0].i = bottom_top_blob.dims; - constants[1].i = bottom_top_blob.w; - constants[2].i = bottom_top_blob.h * bottom_top_blob.d; - constants[3].i = bottom_top_blob.c; - constants[4].i = bottom_top_blob.cstep; + std::vector constants(1); + constants[0].u32 = n; - const Pipeline* pipeline = elempack == 8 ? pipeline_swish_pack8 - : elempack == 4 ? pipeline_swish_pack4 - : pipeline_swish; - - cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); + VkMat dispatcher; + dispatcher.w = n; + dispatcher.h = 1; + dispatcher.c = 1; + cmd.record_pipeline(pipeline_swish, bindings, constants, dispatcher); return 0; } - } // namespace ncnn diff --git a/src/layer/vulkan/swish_vulkan.h b/src/layer/vulkan/swish_vulkan.h index d82baff06..4d2be66d1 100644 --- a/src/layer/vulkan/swish_vulkan.h +++ b/src/layer/vulkan/swish_vulkan.h @@ -21,8 +21,6 @@ public: public: Pipeline* pipeline_swish; - Pipeline* pipeline_swish_pack4; - Pipeline* pipeline_swish_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/tanh_vulkan.cpp b/src/layer/vulkan/tanh_vulkan.cpp index b30ac1f07..70553d27c 100644 --- a/src/layer/vulkan/tanh_vulkan.cpp +++ b/src/layer/vulkan/tanh_vulkan.cpp @@ -12,8 +12,6 @@ TanH_vulkan::TanH_vulkan() support_vulkan = true; pipeline_tanh = 0; - pipeline_tanh_pack4 = 0; - pipeline_tanh_pack8 = 0; } int TanH_vulkan::create_pipeline(const Option& opt) @@ -41,62 +39,14 @@ int TanH_vulkan::create_pipeline(const Option& opt) if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); - std::vector specializations(0 + 5); - specializations[0 + 0].i = shape_packed.dims; - specializations[0 + 1].i = shape_packed.w; - specializations[0 + 2].i = shape_packed.h * shape_packed.d; - specializations[0 + 3].i = shape_packed.c; - specializations[0 + 4].i = shape_packed.cstep; + std::vector specializations(1); + specializations[0 + 0].u32 = shape_packed.total() * elempack / 4; - Mat local_size_xyz; - if (shape_packed.dims == 1) - { - local_size_xyz.w = std::min(64, shape_packed.w); - local_size_xyz.h = 1; - local_size_xyz.c = 1; - } - if (shape_packed.dims == 2) - { - local_size_xyz.w = std::min(8, shape_packed.w); - local_size_xyz.h = std::min(8, shape_packed.h); - local_size_xyz.c = 1; - } - if (shape_packed.dims == 3) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h); - local_size_xyz.c = std::min(4, shape_packed.c); - } - if (shape_packed.dims == 4) - { - local_size_xyz.w = std::min(4, shape_packed.w); - local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); - local_size_xyz.c = std::min(4, shape_packed.c); - } + const int local_size_x = vkdev->info.subgroup_size(); - // pack1 - if (shape.dims == 0 || elempack == 1) - { - pipeline_tanh = new Pipeline(vkdev); - pipeline_tanh->set_optimal_local_size_xyz(local_size_xyz); - pipeline_tanh->create(LayerShaderType::tanh, opt, specializations); - } - - // pack4 - if (shape.dims == 0 || elempack == 4) - { - pipeline_tanh_pack4 = new Pipeline(vkdev); - pipeline_tanh_pack4->set_optimal_local_size_xyz(local_size_xyz); - pipeline_tanh_pack4->create(LayerShaderType::tanh_pack4, opt, specializations); - } - - // pack8 - if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) - { - pipeline_tanh_pack8 = new Pipeline(vkdev); - pipeline_tanh_pack8->set_optimal_local_size_xyz(local_size_xyz); - pipeline_tanh_pack8->create(LayerShaderType::tanh_pack8, opt, specializations); - } + pipeline_tanh = new Pipeline(vkdev); + pipeline_tanh->set_optimal_local_size_xyz(local_size_x, 1, 1); + pipeline_tanh->create(LayerShaderType::tanh, opt, specializations); return 0; } @@ -106,34 +56,24 @@ int TanH_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_tanh; pipeline_tanh = 0; - delete pipeline_tanh_pack4; - pipeline_tanh_pack4 = 0; - - delete pipeline_tanh_pack8; - pipeline_tanh_pack8 = 0; - return 0; } int TanH_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const { - int elempack = bottom_top_blob.elempack; + const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; std::vector bindings(1); bindings[0] = bottom_top_blob; - std::vector constants(5); - constants[0].i = bottom_top_blob.dims; - constants[1].i = bottom_top_blob.w; - constants[2].i = bottom_top_blob.h * bottom_top_blob.d; - constants[3].i = bottom_top_blob.c; - constants[4].i = bottom_top_blob.cstep; - - const Pipeline* pipeline = elempack == 8 ? pipeline_tanh_pack8 - : elempack == 4 ? pipeline_tanh_pack4 - : pipeline_tanh; + std::vector constants(1); + constants[0].u32 = n; - cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); + VkMat dispatcher; + dispatcher.w = n; + dispatcher.h = 1; + dispatcher.c = 1; + cmd.record_pipeline(pipeline_tanh, bindings, constants, dispatcher); return 0; } diff --git a/src/layer/vulkan/tanh_vulkan.h b/src/layer/vulkan/tanh_vulkan.h index 28709b574..751630d00 100644 --- a/src/layer/vulkan/tanh_vulkan.h +++ b/src/layer/vulkan/tanh_vulkan.h @@ -21,8 +21,6 @@ public: public: Pipeline* pipeline_tanh; - Pipeline* pipeline_tanh_pack4; - Pipeline* pipeline_tanh_pack8; }; } // namespace ncnn