From a9d2cb8d6b4d70bed1b2d29f22c8248ee31f4ca1 Mon Sep 17 00:00:00 2001 From: nihui Date: Tue, 28 Jan 2020 18:52:32 +0800 Subject: [PATCH] enable pack8 shaders for layers without shape changes --- src/layer/vulkan/absval_vulkan.cpp | 15 ++++- src/layer/vulkan/absval_vulkan.h | 1 + src/layer/vulkan/cast_vulkan.cpp | 30 ++++++++- src/layer/vulkan/cast_vulkan.h | 2 + src/layer/vulkan/clip_vulkan.cpp | 15 ++++- src/layer/vulkan/clip_vulkan.h | 1 + src/layer/vulkan/dropout_vulkan.cpp | 15 ++++- src/layer/vulkan/dropout_vulkan.h | 1 + src/layer/vulkan/eltwise_vulkan.cpp | 25 ++++++- src/layer/vulkan/eltwise_vulkan.h | 1 + src/layer/vulkan/hardsigmoid_vulkan.cpp | 15 ++++- src/layer/vulkan/hardsigmoid_vulkan.h | 1 + src/layer/vulkan/hardswish_vulkan.cpp | 15 ++++- src/layer/vulkan/hardswish_vulkan.h | 1 + src/layer/vulkan/lrn_vulkan.cpp | 58 +++++++++++++++- src/layer/vulkan/lrn_vulkan.h | 4 ++ src/layer/vulkan/normalize_vulkan.cpp | 68 ++++++++++++++----- src/layer/vulkan/normalize_vulkan.h | 6 +- src/layer/vulkan/padding_vulkan.cpp | 37 +++++----- src/layer/vulkan/padding_vulkan.h | 3 +- src/layer/vulkan/pooling_vulkan.cpp | 90 ++++++++++++++++--------- src/layer/vulkan/pooling_vulkan.h | 4 +- src/layer/vulkan/relu_vulkan.cpp | 15 ++++- src/layer/vulkan/relu_vulkan.h | 1 + src/layer/vulkan/sigmoid_vulkan.cpp | 15 ++++- src/layer/vulkan/sigmoid_vulkan.h | 1 + src/layer/vulkan/softmax_vulkan.cpp | 51 ++++++++++++-- src/layer/vulkan/softmax_vulkan.h | 5 ++ src/layer/vulkan/tanh_vulkan.cpp | 15 ++++- src/layer/vulkan/tanh_vulkan.h | 1 + src/layer/vulkan/unaryop_vulkan.cpp | 15 ++++- src/layer/vulkan/unaryop_vulkan.h | 1 + 32 files changed, 442 insertions(+), 86 deletions(-) diff --git a/src/layer/vulkan/absval_vulkan.cpp b/src/layer/vulkan/absval_vulkan.cpp index 6eef021ea..6dc04b2ff 100644 --- a/src/layer/vulkan/absval_vulkan.cpp +++ b/src/layer/vulkan/absval_vulkan.cpp @@ -24,6 +24,7 @@ AbsVal_vulkan::AbsVal_vulkan() pipeline_absval = 0; pipeline_absval_pack4 = 0; + pipeline_absval_pack8 = 0; } int AbsVal_vulkan::create_pipeline(const Option& opt) @@ -44,6 +45,13 @@ int AbsVal_vulkan::create_pipeline(const Option& opt) pipeline_absval_pack4->create("absval_pack4", opt, specializations, 1, 5); } + // pack8 + { + pipeline_absval_pack8 = new Pipeline(vkdev); + pipeline_absval_pack8->set_optimal_local_size_xyz(); + pipeline_absval_pack8->create("absval_pack8", opt, specializations, 1, 5); + } + return 0; } @@ -55,6 +63,9 @@ int AbsVal_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_absval_pack4; pipeline_absval_pack4 = 0; + delete pipeline_absval_pack8; + pipeline_absval_pack8 = 0; + return 0; } @@ -72,7 +83,9 @@ int AbsVal_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const constants[3].i = bottom_top_blob.c; constants[4].i = bottom_top_blob.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_absval_pack4 : pipeline_absval; + const Pipeline* pipeline = elempack == 8 ? pipeline_absval_pack8 + : elempack == 4 ? pipeline_absval_pack4 + : pipeline_absval; cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); diff --git a/src/layer/vulkan/absval_vulkan.h b/src/layer/vulkan/absval_vulkan.h index e13c4919a..173f838ba 100644 --- a/src/layer/vulkan/absval_vulkan.h +++ b/src/layer/vulkan/absval_vulkan.h @@ -33,6 +33,7 @@ public: public: Pipeline* pipeline_absval; Pipeline* pipeline_absval_pack4; + Pipeline* pipeline_absval_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/cast_vulkan.cpp b/src/layer/vulkan/cast_vulkan.cpp index faa846ec2..540226449 100644 --- a/src/layer/vulkan/cast_vulkan.cpp +++ b/src/layer/vulkan/cast_vulkan.cpp @@ -24,8 +24,10 @@ Cast_vulkan::Cast_vulkan() pipeline_cast_fp32_to_fp16 = 0; pipeline_cast_fp32_to_fp16_pack4 = 0; + pipeline_cast_fp32_to_fp16_pack8 = 0; pipeline_cast_fp16_to_fp32 = 0; pipeline_cast_fp16_to_fp32_pack4 = 0; + pipeline_cast_fp16_to_fp32_pack8 = 0; } int Cast_vulkan::create_pipeline(const Option& opt) @@ -47,6 +49,13 @@ int Cast_vulkan::create_pipeline(const Option& opt) pipeline_cast_fp32_to_fp16_pack4->set_optimal_local_size_xyz(); pipeline_cast_fp32_to_fp16_pack4->create("cast_fp32_to_fp16_pack4", opt, specializations, 2, 10); } + + // pack8 + { + pipeline_cast_fp32_to_fp16_pack8 = new Pipeline(vkdev); + pipeline_cast_fp32_to_fp16_pack8->set_optimal_local_size_xyz(); + pipeline_cast_fp32_to_fp16_pack8->create("cast_fp32_to_fp16_pack8", opt, specializations, 2, 10); + } } if (type_from == 2 && type_to == 1) @@ -64,6 +73,13 @@ int Cast_vulkan::create_pipeline(const Option& opt) pipeline_cast_fp16_to_fp32_pack4->set_optimal_local_size_xyz(); pipeline_cast_fp16_to_fp32_pack4->create("cast_fp16_to_fp32_pack4", opt, specializations, 2, 10); } + + // pack8 + { + pipeline_cast_fp16_to_fp32_pack8 = new Pipeline(vkdev); + pipeline_cast_fp16_to_fp32_pack8->set_optimal_local_size_xyz(); + pipeline_cast_fp16_to_fp32_pack8->create("cast_fp16_to_fp32_pack8", opt, specializations, 2, 10); + } } return 0; @@ -77,12 +93,18 @@ int Cast_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_cast_fp32_to_fp16_pack4; pipeline_cast_fp32_to_fp16_pack4 = 0; + delete pipeline_cast_fp32_to_fp16_pack8; + pipeline_cast_fp32_to_fp16_pack8 = 0; + delete pipeline_cast_fp16_to_fp32; pipeline_cast_fp16_to_fp32 = 0; delete pipeline_cast_fp16_to_fp32_pack4; pipeline_cast_fp16_to_fp32_pack4 = 0; + delete pipeline_cast_fp16_to_fp32_pack8; + pipeline_cast_fp16_to_fp32_pack8 = 0; + return 0; } @@ -159,11 +181,15 @@ int Cast_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& c if (type_from == 1 && type_to == 2) { - pipeline = elempack == 4 ? pipeline_cast_fp32_to_fp16_pack4 : pipeline_cast_fp32_to_fp16; + pipeline = elempack == 8 ? pipeline_cast_fp32_to_fp16_pack8 + : elempack == 4 ? pipeline_cast_fp32_to_fp16_pack4 + : pipeline_cast_fp32_to_fp16; } if (type_from == 2 && type_to == 1) { - pipeline = elempack == 4 ? pipeline_cast_fp16_to_fp32_pack4 : pipeline_cast_fp16_to_fp32; + pipeline = elempack == 8 ? pipeline_cast_fp16_to_fp32_pack8 + : elempack == 4 ? pipeline_cast_fp16_to_fp32_pack4 + : pipeline_cast_fp16_to_fp32; } // TODO more cast type diff --git a/src/layer/vulkan/cast_vulkan.h b/src/layer/vulkan/cast_vulkan.h index e57281fd2..5d951fa71 100644 --- a/src/layer/vulkan/cast_vulkan.h +++ b/src/layer/vulkan/cast_vulkan.h @@ -33,8 +33,10 @@ public: public: Pipeline* pipeline_cast_fp32_to_fp16; Pipeline* pipeline_cast_fp32_to_fp16_pack4; + Pipeline* pipeline_cast_fp32_to_fp16_pack8; Pipeline* pipeline_cast_fp16_to_fp32; Pipeline* pipeline_cast_fp16_to_fp32_pack4; + Pipeline* pipeline_cast_fp16_to_fp32_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/clip_vulkan.cpp b/src/layer/vulkan/clip_vulkan.cpp index 964e011aa..ade7d657c 100644 --- a/src/layer/vulkan/clip_vulkan.cpp +++ b/src/layer/vulkan/clip_vulkan.cpp @@ -24,6 +24,7 @@ Clip_vulkan::Clip_vulkan() pipeline_clip = 0; pipeline_clip_pack4 = 0; + pipeline_clip_pack8 = 0; } int Clip_vulkan::create_pipeline(const Option& opt) @@ -46,6 +47,13 @@ int Clip_vulkan::create_pipeline(const Option& opt) pipeline_clip_pack4->create("clip_pack4", opt, specializations, 1, 5); } + // pack8 + { + pipeline_clip_pack8 = new Pipeline(vkdev); + pipeline_clip_pack8->set_optimal_local_size_xyz(); + pipeline_clip_pack8->create("clip_pack8", opt, specializations, 1, 5); + } + return 0; } @@ -57,6 +65,9 @@ int Clip_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_clip_pack4; pipeline_clip_pack4 = 0; + delete pipeline_clip_pack8; + pipeline_clip_pack8 = 0; + return 0; } @@ -74,7 +85,9 @@ int Clip_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const O constants[3].i = bottom_top_blob.c; constants[4].i = bottom_top_blob.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_clip_pack4 : pipeline_clip; + const Pipeline* pipeline = elempack == 8 ? pipeline_clip_pack8 + : elempack == 4 ? pipeline_clip_pack4 + : pipeline_clip; cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); diff --git a/src/layer/vulkan/clip_vulkan.h b/src/layer/vulkan/clip_vulkan.h index f05c32815..27ef87031 100644 --- a/src/layer/vulkan/clip_vulkan.h +++ b/src/layer/vulkan/clip_vulkan.h @@ -33,6 +33,7 @@ public: public: Pipeline* pipeline_clip; Pipeline* pipeline_clip_pack4; + Pipeline* pipeline_clip_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/dropout_vulkan.cpp b/src/layer/vulkan/dropout_vulkan.cpp index a2b573893..61be4acad 100644 --- a/src/layer/vulkan/dropout_vulkan.cpp +++ b/src/layer/vulkan/dropout_vulkan.cpp @@ -25,6 +25,7 @@ Dropout_vulkan::Dropout_vulkan() pipeline_dropout = 0; pipeline_dropout_pack4 = 0; + pipeline_dropout_pack8 = 0; } int Dropout_vulkan::create_pipeline(const Option& opt) @@ -46,6 +47,13 @@ int Dropout_vulkan::create_pipeline(const Option& opt) pipeline_dropout_pack4->create("dropout_pack4", opt, specializations, 1, 5); } + // pack8 + { + pipeline_dropout_pack8 = new Pipeline(vkdev); + pipeline_dropout_pack8->set_optimal_local_size_xyz(); + pipeline_dropout_pack8->create("dropout_pack8", opt, specializations, 1, 5); + } + return 0; } @@ -57,6 +65,9 @@ int Dropout_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_dropout_pack4; pipeline_dropout_pack4 = 0; + delete pipeline_dropout_pack8; + pipeline_dropout_pack8 = 0; + return 0; } @@ -79,7 +90,9 @@ int Dropout_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons constants[3].i = bottom_top_blob.c; constants[4].i = bottom_top_blob.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_dropout_pack4 : pipeline_dropout; + const Pipeline* pipeline = elempack == 8 ? pipeline_dropout_pack8 + : elempack == 4 ? pipeline_dropout_pack4 + : pipeline_dropout; cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); diff --git a/src/layer/vulkan/dropout_vulkan.h b/src/layer/vulkan/dropout_vulkan.h index 064486edd..5a27bdc33 100644 --- a/src/layer/vulkan/dropout_vulkan.h +++ b/src/layer/vulkan/dropout_vulkan.h @@ -33,6 +33,7 @@ public: public: Pipeline* pipeline_dropout; Pipeline* pipeline_dropout_pack4; + Pipeline* pipeline_dropout_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/eltwise_vulkan.cpp b/src/layer/vulkan/eltwise_vulkan.cpp index 277aaf6e4..76e509ab7 100644 --- a/src/layer/vulkan/eltwise_vulkan.cpp +++ b/src/layer/vulkan/eltwise_vulkan.cpp @@ -27,6 +27,8 @@ Eltwise_vulkan::Eltwise_vulkan() pipeline_eltwise[1] = 0; pipeline_eltwise_pack4[0] = 0; pipeline_eltwise_pack4[1] = 0; + pipeline_eltwise_pack8[0] = 0; + pipeline_eltwise_pack8[1] = 0; } int Eltwise_vulkan::create_pipeline(const Option& opt) @@ -55,6 +57,16 @@ int Eltwise_vulkan::create_pipeline(const Option& opt) pipeline_eltwise_pack4[1]->create("eltwise_pack4", opt, specializations, 3, 5+2); } + // pack8 + { + pipeline_eltwise_pack8[0] = new Pipeline(vkdev); + pipeline_eltwise_pack8[0]->set_optimal_local_size_xyz(); + pipeline_eltwise_pack8[0]->create("eltwise_pack8", opt, specializations, 3, 5+2); + pipeline_eltwise_pack8[1] = new Pipeline(vkdev); + pipeline_eltwise_pack8[1]->set_optimal_local_size_xyz(); + pipeline_eltwise_pack8[1]->create("eltwise_pack8", opt, specializations, 3, 5+2); + } + return 0; } @@ -70,6 +82,11 @@ int Eltwise_vulkan::destroy_pipeline(const Option& /*opt*/) pipeline_eltwise_pack4[0] = 0; pipeline_eltwise_pack4[1] = 0; + delete pipeline_eltwise_pack8[0]; + delete pipeline_eltwise_pack8[1]; + pipeline_eltwise_pack8[0] = 0; + pipeline_eltwise_pack8[1] = 0; + return 0; } @@ -103,7 +120,9 @@ int Eltwise_vulkan::forward(const std::vector& bottom_blobs, std::vector< constants[5].f = coeffs.w == 0 ? 1.f : coeffs[0]; constants[6].f = coeffs.w == 0 ? 1.f : coeffs[1]; - const Pipeline* pipeline = elempack == 4 ? pipeline_eltwise_pack4[1] : pipeline_eltwise[1]; + const Pipeline* pipeline = elempack == 8 ? pipeline_eltwise_pack8[1] + : elempack == 4 ? pipeline_eltwise_pack4[1] + : pipeline_eltwise[1]; cmd.record_pipeline(pipeline, bindings, constants, top_blob); @@ -123,7 +142,9 @@ int Eltwise_vulkan::forward(const std::vector& bottom_blobs, std::vector< constants[5].f = 1.f; constants[6].f = coeffs.w == 0 ? 1 : coeffs[b]; - const Pipeline* pipeline = elempack == 4 ? pipeline_eltwise_pack4[b%2] : pipeline_eltwise[b%2]; + const Pipeline* pipeline = elempack == 8 ? pipeline_eltwise_pack8[b%2] + : elempack == 4 ? pipeline_eltwise_pack4[b%2] + : pipeline_eltwise[b%2]; cmd.record_pipeline(pipeline, bindings, constants, top_blob); } diff --git a/src/layer/vulkan/eltwise_vulkan.h b/src/layer/vulkan/eltwise_vulkan.h index aba843366..c97df3ffa 100644 --- a/src/layer/vulkan/eltwise_vulkan.h +++ b/src/layer/vulkan/eltwise_vulkan.h @@ -33,6 +33,7 @@ public: public: Pipeline* pipeline_eltwise[2]; Pipeline* pipeline_eltwise_pack4[2]; + Pipeline* pipeline_eltwise_pack8[2]; }; } // namespace ncnn diff --git a/src/layer/vulkan/hardsigmoid_vulkan.cpp b/src/layer/vulkan/hardsigmoid_vulkan.cpp index a5e42d72c..32c4363df 100644 --- a/src/layer/vulkan/hardsigmoid_vulkan.cpp +++ b/src/layer/vulkan/hardsigmoid_vulkan.cpp @@ -24,6 +24,7 @@ HardSigmoid_vulkan::HardSigmoid_vulkan() pipeline_hardsigmoid = 0; pipeline_hardsigmoid_pack4 = 0; + pipeline_hardsigmoid_pack8 = 0; } int HardSigmoid_vulkan::create_pipeline(const Option& opt) @@ -46,6 +47,13 @@ int HardSigmoid_vulkan::create_pipeline(const Option& opt) pipeline_hardsigmoid_pack4->create("hardsigmoid_pack4", opt, specializations, 1, 5); } + // pack8 + { + pipeline_hardsigmoid_pack8 = new Pipeline(vkdev); + pipeline_hardsigmoid_pack8->set_optimal_local_size_xyz(); + pipeline_hardsigmoid_pack8->create("hardsigmoid_pack8", opt, specializations, 1, 5); + } + return 0; } @@ -57,6 +65,9 @@ int HardSigmoid_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_hardsigmoid_pack4; pipeline_hardsigmoid_pack4 = 0; + delete pipeline_hardsigmoid_pack8; + pipeline_hardsigmoid_pack8 = 0; + return 0; } @@ -74,7 +85,9 @@ int HardSigmoid_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, constants[3].i = bottom_top_blob.c; constants[4].i = bottom_top_blob.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_hardsigmoid_pack4 : pipeline_hardsigmoid; + const Pipeline* pipeline = elempack == 8 ? pipeline_hardsigmoid_pack8 + : elempack == 4 ? pipeline_hardsigmoid_pack4 + : pipeline_hardsigmoid; cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); diff --git a/src/layer/vulkan/hardsigmoid_vulkan.h b/src/layer/vulkan/hardsigmoid_vulkan.h index 001dc794b..dbdc27fff 100644 --- a/src/layer/vulkan/hardsigmoid_vulkan.h +++ b/src/layer/vulkan/hardsigmoid_vulkan.h @@ -33,6 +33,7 @@ public: public: Pipeline* pipeline_hardsigmoid; Pipeline* pipeline_hardsigmoid_pack4; + Pipeline* pipeline_hardsigmoid_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/hardswish_vulkan.cpp b/src/layer/vulkan/hardswish_vulkan.cpp index d5cbb4360..e3771649b 100644 --- a/src/layer/vulkan/hardswish_vulkan.cpp +++ b/src/layer/vulkan/hardswish_vulkan.cpp @@ -24,6 +24,7 @@ HardSwish_vulkan::HardSwish_vulkan() pipeline_hardswish = 0; pipeline_hardswish_pack4 = 0; + pipeline_hardswish_pack8 = 0; } int HardSwish_vulkan::create_pipeline(const Option& opt) @@ -46,6 +47,13 @@ int HardSwish_vulkan::create_pipeline(const Option& opt) pipeline_hardswish_pack4->create("hardswish_pack4", opt, specializations, 1, 5); } + // pack8 + { + pipeline_hardswish_pack8 = new Pipeline(vkdev); + pipeline_hardswish_pack8->set_optimal_local_size_xyz(); + pipeline_hardswish_pack8->create("hardswish_pack8", opt, specializations, 1, 5); + } + return 0; } @@ -57,6 +65,9 @@ int HardSwish_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_hardswish_pack4; pipeline_hardswish_pack4 = 0; + delete pipeline_hardswish_pack8; + pipeline_hardswish_pack8 = 0; + return 0; } @@ -74,7 +85,9 @@ int HardSwish_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co constants[3].i = bottom_top_blob.c; constants[4].i = bottom_top_blob.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_hardswish_pack4 : pipeline_hardswish; + const Pipeline* pipeline = elempack == 8 ? pipeline_hardswish_pack8 + : elempack == 4 ? pipeline_hardswish_pack4 + : pipeline_hardswish; cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); diff --git a/src/layer/vulkan/hardswish_vulkan.h b/src/layer/vulkan/hardswish_vulkan.h index 3dd60b7ff..b1ce6a76a 100644 --- a/src/layer/vulkan/hardswish_vulkan.h +++ b/src/layer/vulkan/hardswish_vulkan.h @@ -33,6 +33,7 @@ public: public: Pipeline* pipeline_hardswish; Pipeline* pipeline_hardswish_pack4; + Pipeline* pipeline_hardswish_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/lrn_vulkan.cpp b/src/layer/vulkan/lrn_vulkan.cpp index 60508646c..b3e4d043c 100644 --- a/src/layer/vulkan/lrn_vulkan.cpp +++ b/src/layer/vulkan/lrn_vulkan.cpp @@ -29,6 +29,10 @@ LRN_vulkan::LRN_vulkan() pipeline_lrn_norm_across_channel_pack4 = 0; pipeline_lrn_square_pad_within_channel_pack4 = 0; pipeline_lrn_norm_within_channel_pack4 = 0; + pipeline_lrn_square_pad_across_channel_pack8 = 0; + pipeline_lrn_norm_across_channel_pack8 = 0; + pipeline_lrn_square_pad_within_channel_pack8 = 0; + pipeline_lrn_norm_within_channel_pack8 = 0; } int LRN_vulkan::create_pipeline(const Option& opt) @@ -67,6 +71,20 @@ int LRN_vulkan::create_pipeline(const Option& opt) pipeline_lrn_square_pad_within_channel_pack4->set_optimal_local_size_xyz(); pipeline_lrn_square_pad_within_channel_pack4->create("lrn_square_pad_within_channel_pack4", opt, specializations, 2, 10); } + + // pack8 + if (region_type == 0) + { + pipeline_lrn_square_pad_across_channel_pack8 = new Pipeline(vkdev); + pipeline_lrn_square_pad_across_channel_pack8->set_optimal_local_size_xyz(); + pipeline_lrn_square_pad_across_channel_pack8->create("lrn_square_pad_across_channel_pack8", opt, specializations, 2, 10); + } + if (region_type == 1) + { + pipeline_lrn_square_pad_within_channel_pack8 = new Pipeline(vkdev); + pipeline_lrn_square_pad_within_channel_pack8->set_optimal_local_size_xyz(); + pipeline_lrn_square_pad_within_channel_pack8->create("lrn_square_pad_within_channel_pack8", opt, specializations, 2, 10); + } } { @@ -95,6 +113,20 @@ int LRN_vulkan::create_pipeline(const Option& opt) pipeline_lrn_norm_within_channel_pack4->set_optimal_local_size_xyz(); pipeline_lrn_norm_within_channel_pack4->create("lrn_norm_within_channel_pack4", opt, specializations, 2, 10); } + + // pack8 + if (region_type == 0) + { + pipeline_lrn_norm_across_channel_pack8 = new Pipeline(vkdev); + pipeline_lrn_norm_across_channel_pack8->set_optimal_local_size_xyz(); + pipeline_lrn_norm_across_channel_pack8->create("lrn_norm_across_channel_pack8", opt, specializations, 2, 10); + } + if (region_type == 1) + { + pipeline_lrn_norm_within_channel_pack8 = new Pipeline(vkdev); + pipeline_lrn_norm_within_channel_pack8->set_optimal_local_size_xyz(); + pipeline_lrn_norm_within_channel_pack8->create("lrn_norm_within_channel_pack8", opt, specializations, 2, 10); + } } return 0; @@ -120,6 +152,18 @@ int LRN_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_lrn_norm_within_channel_pack4; pipeline_lrn_norm_within_channel_pack4 = 0; + delete pipeline_lrn_square_pad_across_channel_pack8; + pipeline_lrn_square_pad_across_channel_pack8 = 0; + + delete pipeline_lrn_norm_across_channel_pack8; + pipeline_lrn_norm_across_channel_pack8 = 0; + + delete pipeline_lrn_square_pad_within_channel_pack8; + pipeline_lrn_square_pad_within_channel_pack8 = 0; + + delete pipeline_lrn_norm_within_channel_pack8; + pipeline_lrn_norm_within_channel_pack8 = 0; + return 0; } @@ -167,7 +211,12 @@ int LRN_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Op constants[9].i = square_workspace.cstep; const Pipeline* pipeline = 0; - if (elempack == 4) + if (elempack == 8) + { + if (region_type == 0) pipeline = pipeline_lrn_square_pad_across_channel_pack8; + if (region_type == 1) pipeline = pipeline_lrn_square_pad_within_channel_pack8; + } + else if (elempack == 4) { if (region_type == 0) pipeline = pipeline_lrn_square_pad_across_channel_pack4; if (region_type == 1) pipeline = pipeline_lrn_square_pad_within_channel_pack4; @@ -199,7 +248,12 @@ int LRN_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Op constants[9].i = bottom_top_blob.cstep; const Pipeline* pipeline = 0; - if (elempack == 4) + if (elempack == 8) + { + if (region_type == 0) pipeline = pipeline_lrn_norm_across_channel_pack8; + if (region_type == 1) pipeline = pipeline_lrn_norm_within_channel_pack8; + } + else if (elempack == 4) { if (region_type == 0) pipeline = pipeline_lrn_norm_across_channel_pack4; if (region_type == 1) pipeline = pipeline_lrn_norm_within_channel_pack4; diff --git a/src/layer/vulkan/lrn_vulkan.h b/src/layer/vulkan/lrn_vulkan.h index 81e0b6fee..80d664f1f 100644 --- a/src/layer/vulkan/lrn_vulkan.h +++ b/src/layer/vulkan/lrn_vulkan.h @@ -37,6 +37,10 @@ public: Pipeline* pipeline_lrn_norm_across_channel_pack4; Pipeline* pipeline_lrn_square_pad_within_channel_pack4; Pipeline* pipeline_lrn_norm_within_channel_pack4; + Pipeline* pipeline_lrn_square_pad_across_channel_pack8; + Pipeline* pipeline_lrn_norm_across_channel_pack8; + Pipeline* pipeline_lrn_square_pad_within_channel_pack8; + Pipeline* pipeline_lrn_norm_within_channel_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/normalize_vulkan.cpp b/src/layer/vulkan/normalize_vulkan.cpp index e058de193..d8b864575 100644 --- a/src/layer/vulkan/normalize_vulkan.cpp +++ b/src/layer/vulkan/normalize_vulkan.cpp @@ -33,6 +33,12 @@ Normalize_vulkan::Normalize_vulkan() pipeline_normalize_reduce_sum4_fp32_pack4[1] = 0; pipeline_normalize_coeffs_pack4 = 0; pipeline_normalize_norm_pack4 = 0; + + pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8 = 0; + pipeline_normalize_reduce_sum4_fp32_pack8[0] = 0; + pipeline_normalize_reduce_sum4_fp32_pack8[1] = 0; + pipeline_normalize_coeffs_pack8 = 0; + pipeline_normalize_norm_pack8 = 0; } int Normalize_vulkan::create_pipeline(const Option& opt) @@ -65,6 +71,18 @@ int Normalize_vulkan::create_pipeline(const Option& opt) pipeline_normalize_reduce_sum4_fp32_pack4[1] = new Pipeline(vkdev); pipeline_normalize_reduce_sum4_fp32_pack4[1]->set_optimal_local_size_xyz(); pipeline_normalize_reduce_sum4_fp32_pack4[1]->create("normalize_reduce_sum4_fp32_pack4", opt, specializations, 2, 6); + + // pack8 + pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8 = new Pipeline(vkdev); + pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8->set_optimal_local_size_xyz(); + pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8->create("normalize_reduce_sum4_fp16_to_fp32_pack8", opt, specializations, 2, 6); + + pipeline_normalize_reduce_sum4_fp32_pack8[0] = new Pipeline(vkdev); + pipeline_normalize_reduce_sum4_fp32_pack8[0]->set_optimal_local_size_xyz(); + pipeline_normalize_reduce_sum4_fp32_pack8[0]->create("normalize_reduce_sum4_fp32_pack8", opt, specializations, 2, 6); + pipeline_normalize_reduce_sum4_fp32_pack8[1] = new Pipeline(vkdev); + pipeline_normalize_reduce_sum4_fp32_pack8[1]->set_optimal_local_size_xyz(); + pipeline_normalize_reduce_sum4_fp32_pack8[1]->create("normalize_reduce_sum4_fp32_pack8", opt, specializations, 2, 6); } { @@ -79,6 +97,10 @@ int Normalize_vulkan::create_pipeline(const Option& opt) pipeline_normalize_coeffs_pack4 = new Pipeline(vkdev); pipeline_normalize_coeffs_pack4->set_optimal_local_size_xyz(); pipeline_normalize_coeffs_pack4->create("normalize_coeffs_pack4", opt, specializations, 2, 3); + + pipeline_normalize_coeffs_pack8 = new Pipeline(vkdev); + pipeline_normalize_coeffs_pack8->set_optimal_local_size_xyz(); + pipeline_normalize_coeffs_pack8->create("normalize_coeffs_pack8", opt, specializations, 2, 3); } { @@ -95,6 +117,10 @@ int Normalize_vulkan::create_pipeline(const Option& opt) pipeline_normalize_norm_pack4 = new Pipeline(vkdev); pipeline_normalize_norm_pack4->set_optimal_local_size_xyz(); pipeline_normalize_norm_pack4->create("normalize_norm_pack4", opt, specializations, 3, 5); + + pipeline_normalize_norm_pack8 = new Pipeline(vkdev); + pipeline_normalize_norm_pack8->set_optimal_local_size_xyz(); + pipeline_normalize_norm_pack8->create("normalize_norm_pack8", opt, specializations, 3, 5); } return 0; @@ -118,18 +144,32 @@ int Normalize_vulkan::destroy_pipeline(const Option& /*opt*/) pipeline_normalize_reduce_sum4_fp32_pack4[0] = 0; pipeline_normalize_reduce_sum4_fp32_pack4[1] = 0; + delete pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8; + pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8 = 0; + + delete pipeline_normalize_reduce_sum4_fp32_pack8[0]; + delete pipeline_normalize_reduce_sum4_fp32_pack8[1]; + pipeline_normalize_reduce_sum4_fp32_pack8[0] = 0; + pipeline_normalize_reduce_sum4_fp32_pack8[1] = 0; + delete pipeline_normalize_coeffs; pipeline_normalize_coeffs = 0; delete pipeline_normalize_coeffs_pack4; pipeline_normalize_coeffs_pack4 = 0; + delete pipeline_normalize_coeffs_pack8; + pipeline_normalize_coeffs_pack8 = 0; + delete pipeline_normalize_norm; pipeline_normalize_norm = 0; delete pipeline_normalize_norm_pack4; pipeline_normalize_norm_pack4 = 0; + delete pipeline_normalize_norm_pack8; + pipeline_normalize_norm_pack8 = 0; + return 0; } @@ -141,16 +181,6 @@ int Normalize_vulkan::upload_model(VkTransfer& cmd, const Option& opt) Mat scale_data4(4); scale_data4.fill(scale_data[0]); cmd.record_upload(scale_data4, scale_data_gpu, opt); - - Mat scale_data_pack4; - convert_packing(scale_data4, scale_data_pack4, 4); - cmd.record_upload(scale_data_pack4, scale_data_gpu_pack4, opt); - } - else if (scale_data_size % 4 == 0) - { - Mat scale_data_pack4; - convert_packing(scale_data, scale_data_pack4, 4); - cmd.record_upload(scale_data_pack4, scale_data_gpu_pack4, opt); } else { @@ -209,7 +239,9 @@ int Normalize_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co constants[4].i = sqsum_workspace.c; constants[5].i = sqsum_workspace.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_normalize_reduce_sum4_fp16_to_fp32_pack4 : pipeline_normalize_reduce_sum4_fp16_to_fp32; + const Pipeline* pipeline = elempack == 8 ? pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8 + : elempack == 4 ? pipeline_normalize_reduce_sum4_fp16_to_fp32_pack4 + : pipeline_normalize_reduce_sum4_fp16_to_fp32; cmd.record_pipeline(pipeline, bindings, constants, sqsum_workspace); } @@ -257,7 +289,9 @@ int Normalize_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co constants[4].i = sqsum_workspace_reduced.c; constants[5].i = sqsum_workspace_reduced.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_normalize_reduce_sum4_fp32_pack4[pb%2] : pipeline_normalize_reduce_sum4_fp32[pb%2]; + const Pipeline* pipeline = elempack == 8 ? pipeline_normalize_reduce_sum4_fp32_pack8[pb%2] + : elempack == 4 ? pipeline_normalize_reduce_sum4_fp32_pack4[pb%2] + : pipeline_normalize_reduce_sum4_fp32[pb%2]; cmd.record_pipeline(pipeline, bindings, constants, sqsum_workspace_reduced); @@ -281,7 +315,9 @@ int Normalize_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co constants[1].i = sqsum_workspace.c; constants[2].i = sqsum_workspace.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_normalize_coeffs_pack4 : pipeline_normalize_coeffs; + const Pipeline* pipeline = elempack == 8 ? pipeline_normalize_coeffs_pack8 + : elempack == 4 ? pipeline_normalize_coeffs_pack4 + : pipeline_normalize_coeffs; cmd.record_pipeline(pipeline, bindings, constants, coeffs_workspace); } @@ -291,7 +327,7 @@ int Normalize_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co std::vector bindings(3); bindings[0] = bottom_top_blob; bindings[1] = coeffs_workspace; - bindings[2] = (scale_data_size == 1 && scale_data[0] == 1.f) ? coeffs_workspace : elempack == 4 ? scale_data_gpu_pack4 : scale_data_gpu; + bindings[2] = (scale_data_size == 1 && scale_data[0] == 1.f) ? coeffs_workspace : scale_data_gpu; std::vector constants(5); constants[0].i = bottom_top_blob.dims; @@ -300,7 +336,9 @@ int Normalize_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co constants[3].i = bottom_top_blob.c; constants[4].i = bottom_top_blob.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_normalize_norm_pack4 : pipeline_normalize_norm; + const Pipeline* pipeline = elempack == 8 ? pipeline_normalize_norm_pack8 + : elempack == 4 ? pipeline_normalize_norm_pack4 + : pipeline_normalize_norm; cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); } diff --git a/src/layer/vulkan/normalize_vulkan.h b/src/layer/vulkan/normalize_vulkan.h index 604fe1d33..427878f6a 100644 --- a/src/layer/vulkan/normalize_vulkan.h +++ b/src/layer/vulkan/normalize_vulkan.h @@ -39,11 +39,15 @@ public: Pipeline* pipeline_normalize_coeffs; Pipeline* pipeline_normalize_norm; - VkMat scale_data_gpu_pack4; Pipeline* pipeline_normalize_reduce_sum4_fp16_to_fp32_pack4; Pipeline* pipeline_normalize_reduce_sum4_fp32_pack4[2]; Pipeline* pipeline_normalize_coeffs_pack4; Pipeline* pipeline_normalize_norm_pack4; + + Pipeline* pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8; + Pipeline* pipeline_normalize_reduce_sum4_fp32_pack8[2]; + Pipeline* pipeline_normalize_coeffs_pack8; + Pipeline* pipeline_normalize_norm_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/padding_vulkan.cpp b/src/layer/vulkan/padding_vulkan.cpp index c1f856e60..29e89bdf0 100644 --- a/src/layer/vulkan/padding_vulkan.cpp +++ b/src/layer/vulkan/padding_vulkan.cpp @@ -24,6 +24,7 @@ Padding_vulkan::Padding_vulkan() pipeline_padding = 0; pipeline_padding_pack4 = 0; + pipeline_padding_pack8 = 0; } int Padding_vulkan::create_pipeline(const Option& opt) @@ -47,6 +48,13 @@ int Padding_vulkan::create_pipeline(const Option& opt) pipeline_padding_pack4->create("padding_pack4", opt, specializations, 3, 12); } + // pack8 + { + pipeline_padding_pack8 = new Pipeline(vkdev); + pipeline_padding_pack8->set_optimal_local_size_xyz(); + pipeline_padding_pack8->create("padding_pack8", opt, specializations, 3, 12); + } + return 0; } @@ -58,6 +66,9 @@ int Padding_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_padding_pack4; pipeline_padding_pack4 = 0; + delete pipeline_padding_pack8; + pipeline_padding_pack8 = 0; + return 0; } @@ -66,19 +77,7 @@ int Padding_vulkan::upload_model(VkTransfer& cmd, const Option& opt) if (per_channel_pad_data_size == 0) return 0; - // pack1 - if (per_channel_pad_data_size % 4 != 0) - { - cmd.record_upload(per_channel_pad_data, per_channel_pad_data_gpu, opt); - } - - // pack4 - if (per_channel_pad_data_size % 4 == 0) - { - Mat per_channel_pad_data_pack4; - convert_packing(per_channel_pad_data, per_channel_pad_data_pack4, 4); - cmd.record_upload(per_channel_pad_data_pack4, per_channel_pad_data_gpu_pack4, opt); - } + cmd.record_upload(per_channel_pad_data, per_channel_pad_data_gpu, opt); return 0; } @@ -109,7 +108,7 @@ int Padding_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute std::vector bindings(3); bindings[0] = bottom_blob; bindings[1] = top_blob; - bindings[2] = per_channel_pad_data_size ? (elempack == 4 ? per_channel_pad_data_gpu_pack4 : per_channel_pad_data_gpu) : top_blob;// TODO use dummy buffer + bindings[2] = per_channel_pad_data_size ? per_channel_pad_data_gpu : top_blob;// TODO use dummy buffer std::vector constants(12); constants[0].i = bottom_blob.dims; @@ -125,7 +124,9 @@ int Padding_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute constants[10].i = left; constants[11].i = top; - const Pipeline* pipeline = elempack == 4 ? pipeline_padding_pack4 : pipeline_padding; + const Pipeline* pipeline = elempack == 8 ? pipeline_padding_pack8 + : elempack == 4 ? pipeline_padding_pack4 + : pipeline_padding; cmd.record_pipeline(pipeline, bindings, constants, top_blob); @@ -176,7 +177,7 @@ int Padding_vulkan::forward(const std::vector& bottom_blobs, std::vector< std::vector bindings(3); bindings[0] = bottom_blob; bindings[1] = top_blob; - bindings[2] = per_channel_pad_data_size ? (elempack == 4 ? per_channel_pad_data_gpu_pack4 : per_channel_pad_data_gpu) : top_blob;// TODO use dummy buffer + bindings[2] = per_channel_pad_data_size ? per_channel_pad_data_gpu : top_blob;// TODO use dummy buffer std::vector constants(12); constants[0].i = bottom_blob.dims; @@ -192,7 +193,9 @@ int Padding_vulkan::forward(const std::vector& bottom_blobs, std::vector< constants[10].i = _left; constants[11].i = _top; - const Pipeline* pipeline = elempack == 4 ? pipeline_padding_pack4 : pipeline_padding; + const Pipeline* pipeline = elempack == 8 ? pipeline_padding_pack8 + : elempack == 4 ? pipeline_padding_pack4 + : pipeline_padding; cmd.record_pipeline(pipeline, bindings, constants, top_blob); diff --git a/src/layer/vulkan/padding_vulkan.h b/src/layer/vulkan/padding_vulkan.h index b8a833aec..171426769 100644 --- a/src/layer/vulkan/padding_vulkan.h +++ b/src/layer/vulkan/padding_vulkan.h @@ -37,9 +37,8 @@ public: public: VkMat per_channel_pad_data_gpu; Pipeline* pipeline_padding; - - VkMat per_channel_pad_data_gpu_pack4; Pipeline* pipeline_padding_pack4; + Pipeline* pipeline_padding_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/pooling_vulkan.cpp b/src/layer/vulkan/pooling_vulkan.cpp index 09efd2176..282c5c529 100644 --- a/src/layer/vulkan/pooling_vulkan.cpp +++ b/src/layer/vulkan/pooling_vulkan.cpp @@ -27,9 +27,11 @@ Pooling_vulkan::Pooling_vulkan() padding = 0; pipeline_pooling = 0; - pipeline_pooling_global = 0; pipeline_pooling_pack4 = 0; + pipeline_pooling_pack8 = 0; + pipeline_pooling_global = 0; pipeline_pooling_global_pack4 = 0; + pipeline_pooling_global_pack8 = 0; } int Pooling_vulkan::create_pipeline(const Option& opt) @@ -59,34 +61,6 @@ int Pooling_vulkan::create_pipeline(const Option& opt) padding->create_pipeline(opt); } - std::vector specializations(12); - specializations[0].i = pooling_type; - specializations[1].i = kernel_w; - specializations[2].i = kernel_h; - specializations[3].i = stride_w; - specializations[4].i = stride_h; - specializations[5].i = pad_left; - specializations[6].i = pad_right; - specializations[7].i = pad_top; - specializations[8].i = pad_bottom; - specializations[9].i = global_pooling; - specializations[10].i = pad_mode; - specializations[11].i = avgpool_count_include_pad; - - // pack1 - { - pipeline_pooling = new Pipeline(vkdev); - pipeline_pooling->set_optimal_local_size_xyz(); - pipeline_pooling->create("pooling", opt, specializations, 2, 12); - } - - // pack4 - { - pipeline_pooling_pack4 = new Pipeline(vkdev); - pipeline_pooling_pack4->set_optimal_local_size_xyz(); - pipeline_pooling_pack4->create("pooling_pack4", opt, specializations, 2, 12); - } - if (global_pooling) { std::vector specializations(1); @@ -105,6 +79,50 @@ int Pooling_vulkan::create_pipeline(const Option& opt) pipeline_pooling_global_pack4->set_optimal_local_size_xyz(256, 1, 1); pipeline_pooling_global_pack4->create("pooling_global_pack4", opt, specializations, 2, 12); } + + // pack8 + { + pipeline_pooling_global_pack8 = new Pipeline(vkdev); + pipeline_pooling_global_pack8->set_optimal_local_size_xyz(256, 1, 1); + pipeline_pooling_global_pack8->create("pooling_global_pack8", opt, specializations, 2, 12); + } + } + else + { + std::vector specializations(12); + specializations[0].i = pooling_type; + specializations[1].i = kernel_w; + specializations[2].i = kernel_h; + specializations[3].i = stride_w; + specializations[4].i = stride_h; + specializations[5].i = pad_left; + specializations[6].i = pad_right; + specializations[7].i = pad_top; + specializations[8].i = pad_bottom; + specializations[9].i = global_pooling; + specializations[10].i = pad_mode; + specializations[11].i = avgpool_count_include_pad; + + // pack1 + { + pipeline_pooling = new Pipeline(vkdev); + pipeline_pooling->set_optimal_local_size_xyz(); + pipeline_pooling->create("pooling", opt, specializations, 2, 12); + } + + // pack4 + { + pipeline_pooling_pack4 = new Pipeline(vkdev); + pipeline_pooling_pack4->set_optimal_local_size_xyz(); + pipeline_pooling_pack4->create("pooling_pack4", opt, specializations, 2, 12); + } + + // pack8 + { + pipeline_pooling_pack8 = new Pipeline(vkdev); + pipeline_pooling_pack8->set_optimal_local_size_xyz(); + pipeline_pooling_pack8->create("pooling_pack8", opt, specializations, 2, 12); + } } return 0; @@ -125,12 +143,18 @@ int Pooling_vulkan::destroy_pipeline(const Option& opt) delete pipeline_pooling_pack4; pipeline_pooling_pack4 = 0; + delete pipeline_pooling_pack8; + pipeline_pooling_pack8 = 0; + delete pipeline_pooling_global; pipeline_pooling_global = 0; delete pipeline_pooling_global_pack4; pipeline_pooling_global_pack4 = 0; + delete pipeline_pooling_global_pack8; + pipeline_pooling_global_pack8 = 0; + return 0; } @@ -166,7 +190,9 @@ int Pooling_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute constants[10].i = 0; constants[11].i = 0; - const Pipeline* pipeline = elempack == 4 ? pipeline_pooling_global_pack4 : pipeline_pooling_global; + const Pipeline* pipeline = elempack == 8 ? pipeline_pooling_global_pack8 + : elempack == 4 ? pipeline_pooling_global_pack4 + : pipeline_pooling_global; cmd.record_pipeline(pipeline, bindings, constants, top_blob); @@ -298,7 +324,9 @@ int Pooling_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute constants[10].i = wtailpad; constants[11].i = htailpad; - const Pipeline* pipeline = elempack == 4 ? pipeline_pooling_pack4 : pipeline_pooling; + const Pipeline* pipeline = elempack == 8 ? pipeline_pooling_pack8 + : elempack == 4 ? pipeline_pooling_pack4 + : pipeline_pooling; cmd.record_pipeline(pipeline, bindings, constants, top_blob); diff --git a/src/layer/vulkan/pooling_vulkan.h b/src/layer/vulkan/pooling_vulkan.h index 6c22a1c51..cebba78cf 100644 --- a/src/layer/vulkan/pooling_vulkan.h +++ b/src/layer/vulkan/pooling_vulkan.h @@ -34,9 +34,11 @@ public: ncnn::Layer* padding; Pipeline* pipeline_pooling; - Pipeline* pipeline_pooling_global; Pipeline* pipeline_pooling_pack4; + Pipeline* pipeline_pooling_pack8; + Pipeline* pipeline_pooling_global; Pipeline* pipeline_pooling_global_pack4; + Pipeline* pipeline_pooling_global_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/relu_vulkan.cpp b/src/layer/vulkan/relu_vulkan.cpp index 187eca896..2a6b63b3b 100644 --- a/src/layer/vulkan/relu_vulkan.cpp +++ b/src/layer/vulkan/relu_vulkan.cpp @@ -24,6 +24,7 @@ ReLU_vulkan::ReLU_vulkan() pipeline_relu = 0; pipeline_relu_pack4 = 0; + pipeline_relu_pack8 = 0; } int ReLU_vulkan::create_pipeline(const Option& opt) @@ -45,6 +46,13 @@ int ReLU_vulkan::create_pipeline(const Option& opt) pipeline_relu_pack4->create("relu_pack4", opt, specializations, 1, 5); } + // pack8 + { + pipeline_relu_pack8 = new Pipeline(vkdev); + pipeline_relu_pack8->set_optimal_local_size_xyz(); + pipeline_relu_pack8->create("relu_pack8", opt, specializations, 1, 5); + } + return 0; } @@ -56,6 +64,9 @@ int ReLU_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_relu_pack4; pipeline_relu_pack4 = 0; + delete pipeline_relu_pack8; + pipeline_relu_pack8 = 0; + return 0; } @@ -73,7 +84,9 @@ int ReLU_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const O constants[3].i = bottom_top_blob.c; constants[4].i = bottom_top_blob.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_relu_pack4 : pipeline_relu; + const Pipeline* pipeline = elempack == 8 ? pipeline_relu_pack8 + : elempack == 4 ? pipeline_relu_pack4 + : pipeline_relu; cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); diff --git a/src/layer/vulkan/relu_vulkan.h b/src/layer/vulkan/relu_vulkan.h index 854612396..50aeb411e 100644 --- a/src/layer/vulkan/relu_vulkan.h +++ b/src/layer/vulkan/relu_vulkan.h @@ -33,6 +33,7 @@ public: public: Pipeline* pipeline_relu; Pipeline* pipeline_relu_pack4; + Pipeline* pipeline_relu_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/sigmoid_vulkan.cpp b/src/layer/vulkan/sigmoid_vulkan.cpp index 5f2c3fb2f..1e0a83eda 100644 --- a/src/layer/vulkan/sigmoid_vulkan.cpp +++ b/src/layer/vulkan/sigmoid_vulkan.cpp @@ -25,6 +25,7 @@ Sigmoid_vulkan::Sigmoid_vulkan() pipeline_sigmoid = 0; pipeline_sigmoid_pack4 = 0; + pipeline_sigmoid_pack8 = 0; } int Sigmoid_vulkan::create_pipeline(const Option& opt) @@ -45,6 +46,13 @@ int Sigmoid_vulkan::create_pipeline(const Option& opt) pipeline_sigmoid_pack4->create("sigmoid_pack4", opt, specializations, 1, 5); } + // pack8 + { + pipeline_sigmoid_pack8 = new Pipeline(vkdev); + pipeline_sigmoid_pack8->set_optimal_local_size_xyz(); + pipeline_sigmoid_pack8->create("sigmoid_pack8", opt, specializations, 1, 5); + } + return 0; } @@ -56,6 +64,9 @@ int Sigmoid_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_sigmoid_pack4; pipeline_sigmoid_pack4 = 0; + delete pipeline_sigmoid_pack8; + pipeline_sigmoid_pack8 = 0; + return 0; } @@ -73,7 +84,9 @@ int Sigmoid_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons constants[3].i = bottom_top_blob.c; constants[4].i = bottom_top_blob.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_sigmoid_pack4 : pipeline_sigmoid; + const Pipeline* pipeline = elempack == 8 ? pipeline_sigmoid_pack8 + : elempack == 4 ? pipeline_sigmoid_pack4 + : pipeline_sigmoid; cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); diff --git a/src/layer/vulkan/sigmoid_vulkan.h b/src/layer/vulkan/sigmoid_vulkan.h index 4de7d46dc..4d6e50bb6 100644 --- a/src/layer/vulkan/sigmoid_vulkan.h +++ b/src/layer/vulkan/sigmoid_vulkan.h @@ -33,6 +33,7 @@ public: public: Pipeline* pipeline_sigmoid; Pipeline* pipeline_sigmoid_pack4; + Pipeline* pipeline_sigmoid_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/softmax_vulkan.cpp b/src/layer/vulkan/softmax_vulkan.cpp index ff3d0c947..f82fbf3e4 100644 --- a/src/layer/vulkan/softmax_vulkan.cpp +++ b/src/layer/vulkan/softmax_vulkan.cpp @@ -34,6 +34,11 @@ Softmax_vulkan::Softmax_vulkan() pipeline_softmax_exp_sub_max_pack4 = 0; pipeline_softmax_reduce_sum_pack4 = 0; pipeline_softmax_div_sum_pack4 = 0; + + pipeline_softmax_reduce_max_pack8 = 0; + pipeline_softmax_exp_sub_max_pack8 = 0; + pipeline_softmax_reduce_sum_pack8 = 0; + pipeline_softmax_div_sum_pack8 = 0; } int Softmax_vulkan::create_pipeline(const Option& opt) @@ -77,6 +82,24 @@ int Softmax_vulkan::create_pipeline(const Option& opt) pipeline_softmax_div_sum_pack4->create("softmax_div_sum_pack4", opt, specializations, 2, 10); } + // pack8 + { + pipeline_softmax_reduce_max_pack8 = new Pipeline(vkdev); + pipeline_softmax_exp_sub_max_pack8 = new Pipeline(vkdev); + pipeline_softmax_reduce_sum_pack8 = new Pipeline(vkdev); + pipeline_softmax_div_sum_pack8 = new Pipeline(vkdev); + + pipeline_softmax_reduce_max_pack8->set_optimal_local_size_xyz(); + pipeline_softmax_exp_sub_max_pack8->set_optimal_local_size_xyz(); + pipeline_softmax_reduce_sum_pack8->set_optimal_local_size_xyz(); + pipeline_softmax_div_sum_pack8->set_optimal_local_size_xyz(); + + pipeline_softmax_reduce_max_pack8->create("softmax_reduce_max_pack8", opt, specializations, 2, 10); + pipeline_softmax_exp_sub_max_pack8->create("softmax_exp_sub_max_pack8", opt, specializations, 2, 10); + pipeline_softmax_reduce_sum_pack8->create("softmax_reduce_sum_pack8", opt, specializations, 2, 10); + pipeline_softmax_div_sum_pack8->create("softmax_div_sum_pack8", opt, specializations, 2, 10); + } + return 0; } @@ -106,6 +129,18 @@ int Softmax_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_softmax_div_sum_pack4; pipeline_softmax_div_sum_pack4 = 0; + delete pipeline_softmax_reduce_max_pack8; + pipeline_softmax_reduce_max_pack8 = 0; + + delete pipeline_softmax_exp_sub_max_pack8; + pipeline_softmax_exp_sub_max_pack8 = 0; + + delete pipeline_softmax_reduce_sum_pack8; + pipeline_softmax_reduce_sum_pack8 = 0; + + delete pipeline_softmax_div_sum_pack8; + pipeline_softmax_div_sum_pack8 = 0; + return 0; } @@ -170,7 +205,9 @@ int Softmax_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons constants[8].i = max_workspace.c; constants[9].i = max_workspace.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_softmax_reduce_max_pack4 : pipeline_softmax_reduce_max; + const Pipeline* pipeline = elempack == 8 ? pipeline_softmax_reduce_max_pack8 + : elempack == 4 ? pipeline_softmax_reduce_max_pack4 + : pipeline_softmax_reduce_max; cmd.record_pipeline(pipeline, bindings, constants, max_workspace); } @@ -193,7 +230,9 @@ int Softmax_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons constants[8].i = max_workspace.c; constants[9].i = max_workspace.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_softmax_exp_sub_max_pack4 : pipeline_softmax_exp_sub_max; + const Pipeline* pipeline = elempack == 8 ? pipeline_softmax_exp_sub_max_pack8 + : elempack == 4 ? pipeline_softmax_exp_sub_max_pack4 + : pipeline_softmax_exp_sub_max; cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); } @@ -216,7 +255,9 @@ int Softmax_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons constants[8].i = sum_workspace.c; constants[9].i = sum_workspace.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_softmax_reduce_sum_pack4 : pipeline_softmax_reduce_sum; + const Pipeline* pipeline = elempack == 8 ? pipeline_softmax_reduce_sum_pack8 + : elempack == 4 ? pipeline_softmax_reduce_sum_pack4 + : pipeline_softmax_reduce_sum; cmd.record_pipeline(pipeline, bindings, constants, sum_workspace); } @@ -239,7 +280,9 @@ int Softmax_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons constants[8].i = sum_workspace.c; constants[9].i = sum_workspace.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_softmax_div_sum_pack4 : pipeline_softmax_div_sum; + const Pipeline* pipeline = elempack == 8 ? pipeline_softmax_div_sum_pack8 + : elempack == 4 ? pipeline_softmax_div_sum_pack4 + : pipeline_softmax_div_sum; cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); } diff --git a/src/layer/vulkan/softmax_vulkan.h b/src/layer/vulkan/softmax_vulkan.h index 34d8e70e7..e47f3a1b8 100644 --- a/src/layer/vulkan/softmax_vulkan.h +++ b/src/layer/vulkan/softmax_vulkan.h @@ -40,6 +40,11 @@ public: Pipeline* pipeline_softmax_exp_sub_max_pack4; Pipeline* pipeline_softmax_reduce_sum_pack4; Pipeline* pipeline_softmax_div_sum_pack4; + + Pipeline* pipeline_softmax_reduce_max_pack8; + Pipeline* pipeline_softmax_exp_sub_max_pack8; + Pipeline* pipeline_softmax_reduce_sum_pack8; + Pipeline* pipeline_softmax_div_sum_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/tanh_vulkan.cpp b/src/layer/vulkan/tanh_vulkan.cpp index 7c59be393..76f301957 100644 --- a/src/layer/vulkan/tanh_vulkan.cpp +++ b/src/layer/vulkan/tanh_vulkan.cpp @@ -25,6 +25,7 @@ TanH_vulkan::TanH_vulkan() pipeline_tanh = 0; pipeline_tanh_pack4 = 0; + pipeline_tanh_pack8 = 0; } int TanH_vulkan::create_pipeline(const Option& opt) @@ -45,6 +46,13 @@ int TanH_vulkan::create_pipeline(const Option& opt) pipeline_tanh_pack4->create("tanh_pack4", opt, specializations, 1, 5); } + // pack8 + { + pipeline_tanh_pack8 = new Pipeline(vkdev); + pipeline_tanh_pack8->set_optimal_local_size_xyz(); + pipeline_tanh_pack8->create("tanh_pack8", opt, specializations, 1, 5); + } + return 0; } @@ -56,6 +64,9 @@ int TanH_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_tanh_pack4; pipeline_tanh_pack4 = 0; + delete pipeline_tanh_pack8; + pipeline_tanh_pack8 = 0; + return 0; } @@ -73,7 +84,9 @@ int TanH_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const O constants[3].i = bottom_top_blob.c; constants[4].i = bottom_top_blob.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_tanh_pack4 : pipeline_tanh; + const Pipeline* pipeline = elempack == 8 ? pipeline_tanh_pack8 + : elempack == 4 ? pipeline_tanh_pack4 + : pipeline_tanh; cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); diff --git a/src/layer/vulkan/tanh_vulkan.h b/src/layer/vulkan/tanh_vulkan.h index 32d93c1aa..998303c28 100644 --- a/src/layer/vulkan/tanh_vulkan.h +++ b/src/layer/vulkan/tanh_vulkan.h @@ -33,6 +33,7 @@ public: public: Pipeline* pipeline_tanh; Pipeline* pipeline_tanh_pack4; + Pipeline* pipeline_tanh_pack8; }; } // namespace ncnn diff --git a/src/layer/vulkan/unaryop_vulkan.cpp b/src/layer/vulkan/unaryop_vulkan.cpp index b8776cd60..369158ac7 100644 --- a/src/layer/vulkan/unaryop_vulkan.cpp +++ b/src/layer/vulkan/unaryop_vulkan.cpp @@ -24,6 +24,7 @@ UnaryOp_vulkan::UnaryOp_vulkan() pipeline_unaryop = 0; pipeline_unaryop_pack4 = 0; + pipeline_unaryop_pack8 = 0; } int UnaryOp_vulkan::create_pipeline(const Option& opt) @@ -45,6 +46,13 @@ int UnaryOp_vulkan::create_pipeline(const Option& opt) pipeline_unaryop_pack4->create("unaryop_pack4", opt, specializations, 1, 5); } + // pack8 + { + pipeline_unaryop_pack8 = new Pipeline(vkdev); + pipeline_unaryop_pack8->set_optimal_local_size_xyz(); + pipeline_unaryop_pack8->create("unaryop_pack8", opt, specializations, 1, 5); + } + return 0; } @@ -56,6 +64,9 @@ int UnaryOp_vulkan::destroy_pipeline(const Option& /*opt*/) delete pipeline_unaryop_pack4; pipeline_unaryop_pack4 = 0; + delete pipeline_unaryop_pack8; + pipeline_unaryop_pack8 = 0; + return 0; } @@ -73,7 +84,9 @@ int UnaryOp_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons constants[3].i = bottom_top_blob.c; constants[4].i = bottom_top_blob.cstep; - const Pipeline* pipeline = elempack == 4 ? pipeline_unaryop_pack4 : pipeline_unaryop; + const Pipeline* pipeline = elempack == 8 ? pipeline_unaryop_pack8 + : elempack == 4 ? pipeline_unaryop_pack4 + : pipeline_unaryop; cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); diff --git a/src/layer/vulkan/unaryop_vulkan.h b/src/layer/vulkan/unaryop_vulkan.h index ab98043b9..84bfc1827 100644 --- a/src/layer/vulkan/unaryop_vulkan.h +++ b/src/layer/vulkan/unaryop_vulkan.h @@ -33,6 +33,7 @@ public: public: Pipeline* pipeline_unaryop; Pipeline* pipeline_unaryop_pack4; + Pipeline* pipeline_unaryop_pack8; }; } // namespace ncnn