| @@ -24,6 +24,7 @@ AbsVal_vulkan::AbsVal_vulkan() | |||
| pipeline_absval = 0; | |||
| pipeline_absval_pack4 = 0; | |||
| pipeline_absval_pack8 = 0; | |||
| } | |||
| int AbsVal_vulkan::create_pipeline(const Option& opt) | |||
| @@ -44,6 +45,13 @@ int AbsVal_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_absval_pack4->create("absval_pack4", opt, specializations, 1, 5); | |||
| } | |||
| // pack8 | |||
| { | |||
| pipeline_absval_pack8 = new Pipeline(vkdev); | |||
| pipeline_absval_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_absval_pack8->create("absval_pack8", opt, specializations, 1, 5); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -55,6 +63,9 @@ int AbsVal_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_absval_pack4; | |||
| pipeline_absval_pack4 = 0; | |||
| delete pipeline_absval_pack8; | |||
| pipeline_absval_pack8 = 0; | |||
| return 0; | |||
| } | |||
| @@ -72,7 +83,9 @@ int AbsVal_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_absval_pack4 : pipeline_absval; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_absval_pack8 | |||
| : elempack == 4 ? pipeline_absval_pack4 | |||
| : pipeline_absval; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| @@ -33,6 +33,7 @@ public: | |||
| public: | |||
| Pipeline* pipeline_absval; | |||
| Pipeline* pipeline_absval_pack4; | |||
| Pipeline* pipeline_absval_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -24,8 +24,10 @@ Cast_vulkan::Cast_vulkan() | |||
| pipeline_cast_fp32_to_fp16 = 0; | |||
| pipeline_cast_fp32_to_fp16_pack4 = 0; | |||
| pipeline_cast_fp32_to_fp16_pack8 = 0; | |||
| pipeline_cast_fp16_to_fp32 = 0; | |||
| pipeline_cast_fp16_to_fp32_pack4 = 0; | |||
| pipeline_cast_fp16_to_fp32_pack8 = 0; | |||
| } | |||
| int Cast_vulkan::create_pipeline(const Option& opt) | |||
| @@ -47,6 +49,13 @@ int Cast_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_cast_fp32_to_fp16_pack4->set_optimal_local_size_xyz(); | |||
| pipeline_cast_fp32_to_fp16_pack4->create("cast_fp32_to_fp16_pack4", opt, specializations, 2, 10); | |||
| } | |||
| // pack8 | |||
| { | |||
| pipeline_cast_fp32_to_fp16_pack8 = new Pipeline(vkdev); | |||
| pipeline_cast_fp32_to_fp16_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_cast_fp32_to_fp16_pack8->create("cast_fp32_to_fp16_pack8", opt, specializations, 2, 10); | |||
| } | |||
| } | |||
| if (type_from == 2 && type_to == 1) | |||
| @@ -64,6 +73,13 @@ int Cast_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_cast_fp16_to_fp32_pack4->set_optimal_local_size_xyz(); | |||
| pipeline_cast_fp16_to_fp32_pack4->create("cast_fp16_to_fp32_pack4", opt, specializations, 2, 10); | |||
| } | |||
| // pack8 | |||
| { | |||
| pipeline_cast_fp16_to_fp32_pack8 = new Pipeline(vkdev); | |||
| pipeline_cast_fp16_to_fp32_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_cast_fp16_to_fp32_pack8->create("cast_fp16_to_fp32_pack8", opt, specializations, 2, 10); | |||
| } | |||
| } | |||
| return 0; | |||
| @@ -77,12 +93,18 @@ int Cast_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_cast_fp32_to_fp16_pack4; | |||
| pipeline_cast_fp32_to_fp16_pack4 = 0; | |||
| delete pipeline_cast_fp32_to_fp16_pack8; | |||
| pipeline_cast_fp32_to_fp16_pack8 = 0; | |||
| delete pipeline_cast_fp16_to_fp32; | |||
| pipeline_cast_fp16_to_fp32 = 0; | |||
| delete pipeline_cast_fp16_to_fp32_pack4; | |||
| pipeline_cast_fp16_to_fp32_pack4 = 0; | |||
| delete pipeline_cast_fp16_to_fp32_pack8; | |||
| pipeline_cast_fp16_to_fp32_pack8 = 0; | |||
| return 0; | |||
| } | |||
| @@ -159,11 +181,15 @@ int Cast_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& c | |||
| if (type_from == 1 && type_to == 2) | |||
| { | |||
| pipeline = elempack == 4 ? pipeline_cast_fp32_to_fp16_pack4 : pipeline_cast_fp32_to_fp16; | |||
| pipeline = elempack == 8 ? pipeline_cast_fp32_to_fp16_pack8 | |||
| : elempack == 4 ? pipeline_cast_fp32_to_fp16_pack4 | |||
| : pipeline_cast_fp32_to_fp16; | |||
| } | |||
| if (type_from == 2 && type_to == 1) | |||
| { | |||
| pipeline = elempack == 4 ? pipeline_cast_fp16_to_fp32_pack4 : pipeline_cast_fp16_to_fp32; | |||
| pipeline = elempack == 8 ? pipeline_cast_fp16_to_fp32_pack8 | |||
| : elempack == 4 ? pipeline_cast_fp16_to_fp32_pack4 | |||
| : pipeline_cast_fp16_to_fp32; | |||
| } | |||
| // TODO more cast type | |||
| @@ -33,8 +33,10 @@ public: | |||
| public: | |||
| Pipeline* pipeline_cast_fp32_to_fp16; | |||
| Pipeline* pipeline_cast_fp32_to_fp16_pack4; | |||
| Pipeline* pipeline_cast_fp32_to_fp16_pack8; | |||
| Pipeline* pipeline_cast_fp16_to_fp32; | |||
| Pipeline* pipeline_cast_fp16_to_fp32_pack4; | |||
| Pipeline* pipeline_cast_fp16_to_fp32_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -24,6 +24,7 @@ Clip_vulkan::Clip_vulkan() | |||
| pipeline_clip = 0; | |||
| pipeline_clip_pack4 = 0; | |||
| pipeline_clip_pack8 = 0; | |||
| } | |||
| int Clip_vulkan::create_pipeline(const Option& opt) | |||
| @@ -46,6 +47,13 @@ int Clip_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_clip_pack4->create("clip_pack4", opt, specializations, 1, 5); | |||
| } | |||
| // pack8 | |||
| { | |||
| pipeline_clip_pack8 = new Pipeline(vkdev); | |||
| pipeline_clip_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_clip_pack8->create("clip_pack8", opt, specializations, 1, 5); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -57,6 +65,9 @@ int Clip_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_clip_pack4; | |||
| pipeline_clip_pack4 = 0; | |||
| delete pipeline_clip_pack8; | |||
| pipeline_clip_pack8 = 0; | |||
| return 0; | |||
| } | |||
| @@ -74,7 +85,9 @@ int Clip_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const O | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_clip_pack4 : pipeline_clip; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_clip_pack8 | |||
| : elempack == 4 ? pipeline_clip_pack4 | |||
| : pipeline_clip; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| @@ -33,6 +33,7 @@ public: | |||
| public: | |||
| Pipeline* pipeline_clip; | |||
| Pipeline* pipeline_clip_pack4; | |||
| Pipeline* pipeline_clip_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -25,6 +25,7 @@ Dropout_vulkan::Dropout_vulkan() | |||
| pipeline_dropout = 0; | |||
| pipeline_dropout_pack4 = 0; | |||
| pipeline_dropout_pack8 = 0; | |||
| } | |||
| int Dropout_vulkan::create_pipeline(const Option& opt) | |||
| @@ -46,6 +47,13 @@ int Dropout_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_dropout_pack4->create("dropout_pack4", opt, specializations, 1, 5); | |||
| } | |||
| // pack8 | |||
| { | |||
| pipeline_dropout_pack8 = new Pipeline(vkdev); | |||
| pipeline_dropout_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_dropout_pack8->create("dropout_pack8", opt, specializations, 1, 5); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -57,6 +65,9 @@ int Dropout_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_dropout_pack4; | |||
| pipeline_dropout_pack4 = 0; | |||
| delete pipeline_dropout_pack8; | |||
| pipeline_dropout_pack8 = 0; | |||
| return 0; | |||
| } | |||
| @@ -79,7 +90,9 @@ int Dropout_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_dropout_pack4 : pipeline_dropout; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_dropout_pack8 | |||
| : elempack == 4 ? pipeline_dropout_pack4 | |||
| : pipeline_dropout; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| @@ -33,6 +33,7 @@ public: | |||
| public: | |||
| Pipeline* pipeline_dropout; | |||
| Pipeline* pipeline_dropout_pack4; | |||
| Pipeline* pipeline_dropout_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -27,6 +27,8 @@ Eltwise_vulkan::Eltwise_vulkan() | |||
| pipeline_eltwise[1] = 0; | |||
| pipeline_eltwise_pack4[0] = 0; | |||
| pipeline_eltwise_pack4[1] = 0; | |||
| pipeline_eltwise_pack8[0] = 0; | |||
| pipeline_eltwise_pack8[1] = 0; | |||
| } | |||
| int Eltwise_vulkan::create_pipeline(const Option& opt) | |||
| @@ -55,6 +57,16 @@ int Eltwise_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_eltwise_pack4[1]->create("eltwise_pack4", opt, specializations, 3, 5+2); | |||
| } | |||
| // pack8 | |||
| { | |||
| pipeline_eltwise_pack8[0] = new Pipeline(vkdev); | |||
| pipeline_eltwise_pack8[0]->set_optimal_local_size_xyz(); | |||
| pipeline_eltwise_pack8[0]->create("eltwise_pack8", opt, specializations, 3, 5+2); | |||
| pipeline_eltwise_pack8[1] = new Pipeline(vkdev); | |||
| pipeline_eltwise_pack8[1]->set_optimal_local_size_xyz(); | |||
| pipeline_eltwise_pack8[1]->create("eltwise_pack8", opt, specializations, 3, 5+2); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -70,6 +82,11 @@ int Eltwise_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| pipeline_eltwise_pack4[0] = 0; | |||
| pipeline_eltwise_pack4[1] = 0; | |||
| delete pipeline_eltwise_pack8[0]; | |||
| delete pipeline_eltwise_pack8[1]; | |||
| pipeline_eltwise_pack8[0] = 0; | |||
| pipeline_eltwise_pack8[1] = 0; | |||
| return 0; | |||
| } | |||
| @@ -103,7 +120,9 @@ int Eltwise_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector< | |||
| constants[5].f = coeffs.w == 0 ? 1.f : coeffs[0]; | |||
| constants[6].f = coeffs.w == 0 ? 1.f : coeffs[1]; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_eltwise_pack4[1] : pipeline_eltwise[1]; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_eltwise_pack8[1] | |||
| : elempack == 4 ? pipeline_eltwise_pack4[1] | |||
| : pipeline_eltwise[1]; | |||
| cmd.record_pipeline(pipeline, bindings, constants, top_blob); | |||
| @@ -123,7 +142,9 @@ int Eltwise_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector< | |||
| constants[5].f = 1.f; | |||
| constants[6].f = coeffs.w == 0 ? 1 : coeffs[b]; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_eltwise_pack4[b%2] : pipeline_eltwise[b%2]; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_eltwise_pack8[b%2] | |||
| : elempack == 4 ? pipeline_eltwise_pack4[b%2] | |||
| : pipeline_eltwise[b%2]; | |||
| cmd.record_pipeline(pipeline, bindings, constants, top_blob); | |||
| } | |||
| @@ -33,6 +33,7 @@ public: | |||
| public: | |||
| Pipeline* pipeline_eltwise[2]; | |||
| Pipeline* pipeline_eltwise_pack4[2]; | |||
| Pipeline* pipeline_eltwise_pack8[2]; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -24,6 +24,7 @@ HardSigmoid_vulkan::HardSigmoid_vulkan() | |||
| pipeline_hardsigmoid = 0; | |||
| pipeline_hardsigmoid_pack4 = 0; | |||
| pipeline_hardsigmoid_pack8 = 0; | |||
| } | |||
| int HardSigmoid_vulkan::create_pipeline(const Option& opt) | |||
| @@ -46,6 +47,13 @@ int HardSigmoid_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_hardsigmoid_pack4->create("hardsigmoid_pack4", opt, specializations, 1, 5); | |||
| } | |||
| // pack8 | |||
| { | |||
| pipeline_hardsigmoid_pack8 = new Pipeline(vkdev); | |||
| pipeline_hardsigmoid_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_hardsigmoid_pack8->create("hardsigmoid_pack8", opt, specializations, 1, 5); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -57,6 +65,9 @@ int HardSigmoid_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_hardsigmoid_pack4; | |||
| pipeline_hardsigmoid_pack4 = 0; | |||
| delete pipeline_hardsigmoid_pack8; | |||
| pipeline_hardsigmoid_pack8 = 0; | |||
| return 0; | |||
| } | |||
| @@ -74,7 +85,9 @@ int HardSigmoid_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_hardsigmoid_pack4 : pipeline_hardsigmoid; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_hardsigmoid_pack8 | |||
| : elempack == 4 ? pipeline_hardsigmoid_pack4 | |||
| : pipeline_hardsigmoid; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| @@ -33,6 +33,7 @@ public: | |||
| public: | |||
| Pipeline* pipeline_hardsigmoid; | |||
| Pipeline* pipeline_hardsigmoid_pack4; | |||
| Pipeline* pipeline_hardsigmoid_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -24,6 +24,7 @@ HardSwish_vulkan::HardSwish_vulkan() | |||
| pipeline_hardswish = 0; | |||
| pipeline_hardswish_pack4 = 0; | |||
| pipeline_hardswish_pack8 = 0; | |||
| } | |||
| int HardSwish_vulkan::create_pipeline(const Option& opt) | |||
| @@ -46,6 +47,13 @@ int HardSwish_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_hardswish_pack4->create("hardswish_pack4", opt, specializations, 1, 5); | |||
| } | |||
| // pack8 | |||
| { | |||
| pipeline_hardswish_pack8 = new Pipeline(vkdev); | |||
| pipeline_hardswish_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_hardswish_pack8->create("hardswish_pack8", opt, specializations, 1, 5); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -57,6 +65,9 @@ int HardSwish_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_hardswish_pack4; | |||
| pipeline_hardswish_pack4 = 0; | |||
| delete pipeline_hardswish_pack8; | |||
| pipeline_hardswish_pack8 = 0; | |||
| return 0; | |||
| } | |||
| @@ -74,7 +85,9 @@ int HardSwish_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_hardswish_pack4 : pipeline_hardswish; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_hardswish_pack8 | |||
| : elempack == 4 ? pipeline_hardswish_pack4 | |||
| : pipeline_hardswish; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| @@ -33,6 +33,7 @@ public: | |||
| public: | |||
| Pipeline* pipeline_hardswish; | |||
| Pipeline* pipeline_hardswish_pack4; | |||
| Pipeline* pipeline_hardswish_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -29,6 +29,10 @@ LRN_vulkan::LRN_vulkan() | |||
| pipeline_lrn_norm_across_channel_pack4 = 0; | |||
| pipeline_lrn_square_pad_within_channel_pack4 = 0; | |||
| pipeline_lrn_norm_within_channel_pack4 = 0; | |||
| pipeline_lrn_square_pad_across_channel_pack8 = 0; | |||
| pipeline_lrn_norm_across_channel_pack8 = 0; | |||
| pipeline_lrn_square_pad_within_channel_pack8 = 0; | |||
| pipeline_lrn_norm_within_channel_pack8 = 0; | |||
| } | |||
| int LRN_vulkan::create_pipeline(const Option& opt) | |||
| @@ -67,6 +71,20 @@ int LRN_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_lrn_square_pad_within_channel_pack4->set_optimal_local_size_xyz(); | |||
| pipeline_lrn_square_pad_within_channel_pack4->create("lrn_square_pad_within_channel_pack4", opt, specializations, 2, 10); | |||
| } | |||
| // pack8 | |||
| if (region_type == 0) | |||
| { | |||
| pipeline_lrn_square_pad_across_channel_pack8 = new Pipeline(vkdev); | |||
| pipeline_lrn_square_pad_across_channel_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_lrn_square_pad_across_channel_pack8->create("lrn_square_pad_across_channel_pack8", opt, specializations, 2, 10); | |||
| } | |||
| if (region_type == 1) | |||
| { | |||
| pipeline_lrn_square_pad_within_channel_pack8 = new Pipeline(vkdev); | |||
| pipeline_lrn_square_pad_within_channel_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_lrn_square_pad_within_channel_pack8->create("lrn_square_pad_within_channel_pack8", opt, specializations, 2, 10); | |||
| } | |||
| } | |||
| { | |||
| @@ -95,6 +113,20 @@ int LRN_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_lrn_norm_within_channel_pack4->set_optimal_local_size_xyz(); | |||
| pipeline_lrn_norm_within_channel_pack4->create("lrn_norm_within_channel_pack4", opt, specializations, 2, 10); | |||
| } | |||
| // pack8 | |||
| if (region_type == 0) | |||
| { | |||
| pipeline_lrn_norm_across_channel_pack8 = new Pipeline(vkdev); | |||
| pipeline_lrn_norm_across_channel_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_lrn_norm_across_channel_pack8->create("lrn_norm_across_channel_pack8", opt, specializations, 2, 10); | |||
| } | |||
| if (region_type == 1) | |||
| { | |||
| pipeline_lrn_norm_within_channel_pack8 = new Pipeline(vkdev); | |||
| pipeline_lrn_norm_within_channel_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_lrn_norm_within_channel_pack8->create("lrn_norm_within_channel_pack8", opt, specializations, 2, 10); | |||
| } | |||
| } | |||
| return 0; | |||
| @@ -120,6 +152,18 @@ int LRN_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_lrn_norm_within_channel_pack4; | |||
| pipeline_lrn_norm_within_channel_pack4 = 0; | |||
| delete pipeline_lrn_square_pad_across_channel_pack8; | |||
| pipeline_lrn_square_pad_across_channel_pack8 = 0; | |||
| delete pipeline_lrn_norm_across_channel_pack8; | |||
| pipeline_lrn_norm_across_channel_pack8 = 0; | |||
| delete pipeline_lrn_square_pad_within_channel_pack8; | |||
| pipeline_lrn_square_pad_within_channel_pack8 = 0; | |||
| delete pipeline_lrn_norm_within_channel_pack8; | |||
| pipeline_lrn_norm_within_channel_pack8 = 0; | |||
| return 0; | |||
| } | |||
| @@ -167,7 +211,12 @@ int LRN_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Op | |||
| constants[9].i = square_workspace.cstep; | |||
| const Pipeline* pipeline = 0; | |||
| if (elempack == 4) | |||
| if (elempack == 8) | |||
| { | |||
| if (region_type == 0) pipeline = pipeline_lrn_square_pad_across_channel_pack8; | |||
| if (region_type == 1) pipeline = pipeline_lrn_square_pad_within_channel_pack8; | |||
| } | |||
| else if (elempack == 4) | |||
| { | |||
| if (region_type == 0) pipeline = pipeline_lrn_square_pad_across_channel_pack4; | |||
| if (region_type == 1) pipeline = pipeline_lrn_square_pad_within_channel_pack4; | |||
| @@ -199,7 +248,12 @@ int LRN_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Op | |||
| constants[9].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = 0; | |||
| if (elempack == 4) | |||
| if (elempack == 8) | |||
| { | |||
| if (region_type == 0) pipeline = pipeline_lrn_norm_across_channel_pack8; | |||
| if (region_type == 1) pipeline = pipeline_lrn_norm_within_channel_pack8; | |||
| } | |||
| else if (elempack == 4) | |||
| { | |||
| if (region_type == 0) pipeline = pipeline_lrn_norm_across_channel_pack4; | |||
| if (region_type == 1) pipeline = pipeline_lrn_norm_within_channel_pack4; | |||
| @@ -37,6 +37,10 @@ public: | |||
| Pipeline* pipeline_lrn_norm_across_channel_pack4; | |||
| Pipeline* pipeline_lrn_square_pad_within_channel_pack4; | |||
| Pipeline* pipeline_lrn_norm_within_channel_pack4; | |||
| Pipeline* pipeline_lrn_square_pad_across_channel_pack8; | |||
| Pipeline* pipeline_lrn_norm_across_channel_pack8; | |||
| Pipeline* pipeline_lrn_square_pad_within_channel_pack8; | |||
| Pipeline* pipeline_lrn_norm_within_channel_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -33,6 +33,12 @@ Normalize_vulkan::Normalize_vulkan() | |||
| pipeline_normalize_reduce_sum4_fp32_pack4[1] = 0; | |||
| pipeline_normalize_coeffs_pack4 = 0; | |||
| pipeline_normalize_norm_pack4 = 0; | |||
| pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8 = 0; | |||
| pipeline_normalize_reduce_sum4_fp32_pack8[0] = 0; | |||
| pipeline_normalize_reduce_sum4_fp32_pack8[1] = 0; | |||
| pipeline_normalize_coeffs_pack8 = 0; | |||
| pipeline_normalize_norm_pack8 = 0; | |||
| } | |||
| int Normalize_vulkan::create_pipeline(const Option& opt) | |||
| @@ -65,6 +71,18 @@ int Normalize_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_normalize_reduce_sum4_fp32_pack4[1] = new Pipeline(vkdev); | |||
| pipeline_normalize_reduce_sum4_fp32_pack4[1]->set_optimal_local_size_xyz(); | |||
| pipeline_normalize_reduce_sum4_fp32_pack4[1]->create("normalize_reduce_sum4_fp32_pack4", opt, specializations, 2, 6); | |||
| // pack8 | |||
| pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8 = new Pipeline(vkdev); | |||
| pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8->create("normalize_reduce_sum4_fp16_to_fp32_pack8", opt, specializations, 2, 6); | |||
| pipeline_normalize_reduce_sum4_fp32_pack8[0] = new Pipeline(vkdev); | |||
| pipeline_normalize_reduce_sum4_fp32_pack8[0]->set_optimal_local_size_xyz(); | |||
| pipeline_normalize_reduce_sum4_fp32_pack8[0]->create("normalize_reduce_sum4_fp32_pack8", opt, specializations, 2, 6); | |||
| pipeline_normalize_reduce_sum4_fp32_pack8[1] = new Pipeline(vkdev); | |||
| pipeline_normalize_reduce_sum4_fp32_pack8[1]->set_optimal_local_size_xyz(); | |||
| pipeline_normalize_reduce_sum4_fp32_pack8[1]->create("normalize_reduce_sum4_fp32_pack8", opt, specializations, 2, 6); | |||
| } | |||
| { | |||
| @@ -79,6 +97,10 @@ int Normalize_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_normalize_coeffs_pack4 = new Pipeline(vkdev); | |||
| pipeline_normalize_coeffs_pack4->set_optimal_local_size_xyz(); | |||
| pipeline_normalize_coeffs_pack4->create("normalize_coeffs_pack4", opt, specializations, 2, 3); | |||
| pipeline_normalize_coeffs_pack8 = new Pipeline(vkdev); | |||
| pipeline_normalize_coeffs_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_normalize_coeffs_pack8->create("normalize_coeffs_pack8", opt, specializations, 2, 3); | |||
| } | |||
| { | |||
| @@ -95,6 +117,10 @@ int Normalize_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_normalize_norm_pack4 = new Pipeline(vkdev); | |||
| pipeline_normalize_norm_pack4->set_optimal_local_size_xyz(); | |||
| pipeline_normalize_norm_pack4->create("normalize_norm_pack4", opt, specializations, 3, 5); | |||
| pipeline_normalize_norm_pack8 = new Pipeline(vkdev); | |||
| pipeline_normalize_norm_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_normalize_norm_pack8->create("normalize_norm_pack8", opt, specializations, 3, 5); | |||
| } | |||
| return 0; | |||
| @@ -118,18 +144,32 @@ int Normalize_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| pipeline_normalize_reduce_sum4_fp32_pack4[0] = 0; | |||
| pipeline_normalize_reduce_sum4_fp32_pack4[1] = 0; | |||
| delete pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8; | |||
| pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8 = 0; | |||
| delete pipeline_normalize_reduce_sum4_fp32_pack8[0]; | |||
| delete pipeline_normalize_reduce_sum4_fp32_pack8[1]; | |||
| pipeline_normalize_reduce_sum4_fp32_pack8[0] = 0; | |||
| pipeline_normalize_reduce_sum4_fp32_pack8[1] = 0; | |||
| delete pipeline_normalize_coeffs; | |||
| pipeline_normalize_coeffs = 0; | |||
| delete pipeline_normalize_coeffs_pack4; | |||
| pipeline_normalize_coeffs_pack4 = 0; | |||
| delete pipeline_normalize_coeffs_pack8; | |||
| pipeline_normalize_coeffs_pack8 = 0; | |||
| delete pipeline_normalize_norm; | |||
| pipeline_normalize_norm = 0; | |||
| delete pipeline_normalize_norm_pack4; | |||
| pipeline_normalize_norm_pack4 = 0; | |||
| delete pipeline_normalize_norm_pack8; | |||
| pipeline_normalize_norm_pack8 = 0; | |||
| return 0; | |||
| } | |||
| @@ -141,16 +181,6 @@ int Normalize_vulkan::upload_model(VkTransfer& cmd, const Option& opt) | |||
| Mat scale_data4(4); | |||
| scale_data4.fill(scale_data[0]); | |||
| cmd.record_upload(scale_data4, scale_data_gpu, opt); | |||
| Mat scale_data_pack4; | |||
| convert_packing(scale_data4, scale_data_pack4, 4); | |||
| cmd.record_upload(scale_data_pack4, scale_data_gpu_pack4, opt); | |||
| } | |||
| else if (scale_data_size % 4 == 0) | |||
| { | |||
| Mat scale_data_pack4; | |||
| convert_packing(scale_data, scale_data_pack4, 4); | |||
| cmd.record_upload(scale_data_pack4, scale_data_gpu_pack4, opt); | |||
| } | |||
| else | |||
| { | |||
| @@ -209,7 +239,9 @@ int Normalize_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co | |||
| constants[4].i = sqsum_workspace.c; | |||
| constants[5].i = sqsum_workspace.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_normalize_reduce_sum4_fp16_to_fp32_pack4 : pipeline_normalize_reduce_sum4_fp16_to_fp32; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8 | |||
| : elempack == 4 ? pipeline_normalize_reduce_sum4_fp16_to_fp32_pack4 | |||
| : pipeline_normalize_reduce_sum4_fp16_to_fp32; | |||
| cmd.record_pipeline(pipeline, bindings, constants, sqsum_workspace); | |||
| } | |||
| @@ -257,7 +289,9 @@ int Normalize_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co | |||
| constants[4].i = sqsum_workspace_reduced.c; | |||
| constants[5].i = sqsum_workspace_reduced.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_normalize_reduce_sum4_fp32_pack4[pb%2] : pipeline_normalize_reduce_sum4_fp32[pb%2]; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_normalize_reduce_sum4_fp32_pack8[pb%2] | |||
| : elempack == 4 ? pipeline_normalize_reduce_sum4_fp32_pack4[pb%2] | |||
| : pipeline_normalize_reduce_sum4_fp32[pb%2]; | |||
| cmd.record_pipeline(pipeline, bindings, constants, sqsum_workspace_reduced); | |||
| @@ -281,7 +315,9 @@ int Normalize_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co | |||
| constants[1].i = sqsum_workspace.c; | |||
| constants[2].i = sqsum_workspace.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_normalize_coeffs_pack4 : pipeline_normalize_coeffs; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_normalize_coeffs_pack8 | |||
| : elempack == 4 ? pipeline_normalize_coeffs_pack4 | |||
| : pipeline_normalize_coeffs; | |||
| cmd.record_pipeline(pipeline, bindings, constants, coeffs_workspace); | |||
| } | |||
| @@ -291,7 +327,7 @@ int Normalize_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co | |||
| std::vector<VkMat> bindings(3); | |||
| bindings[0] = bottom_top_blob; | |||
| bindings[1] = coeffs_workspace; | |||
| bindings[2] = (scale_data_size == 1 && scale_data[0] == 1.f) ? coeffs_workspace : elempack == 4 ? scale_data_gpu_pack4 : scale_data_gpu; | |||
| bindings[2] = (scale_data_size == 1 && scale_data[0] == 1.f) ? coeffs_workspace : scale_data_gpu; | |||
| std::vector<vk_constant_type> constants(5); | |||
| constants[0].i = bottom_top_blob.dims; | |||
| @@ -300,7 +336,9 @@ int Normalize_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_normalize_norm_pack4 : pipeline_normalize_norm; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_normalize_norm_pack8 | |||
| : elempack == 4 ? pipeline_normalize_norm_pack4 | |||
| : pipeline_normalize_norm; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| } | |||
| @@ -39,11 +39,15 @@ public: | |||
| Pipeline* pipeline_normalize_coeffs; | |||
| Pipeline* pipeline_normalize_norm; | |||
| VkMat scale_data_gpu_pack4; | |||
| Pipeline* pipeline_normalize_reduce_sum4_fp16_to_fp32_pack4; | |||
| Pipeline* pipeline_normalize_reduce_sum4_fp32_pack4[2]; | |||
| Pipeline* pipeline_normalize_coeffs_pack4; | |||
| Pipeline* pipeline_normalize_norm_pack4; | |||
| Pipeline* pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8; | |||
| Pipeline* pipeline_normalize_reduce_sum4_fp32_pack8[2]; | |||
| Pipeline* pipeline_normalize_coeffs_pack8; | |||
| Pipeline* pipeline_normalize_norm_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -24,6 +24,7 @@ Padding_vulkan::Padding_vulkan() | |||
| pipeline_padding = 0; | |||
| pipeline_padding_pack4 = 0; | |||
| pipeline_padding_pack8 = 0; | |||
| } | |||
| int Padding_vulkan::create_pipeline(const Option& opt) | |||
| @@ -47,6 +48,13 @@ int Padding_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_padding_pack4->create("padding_pack4", opt, specializations, 3, 12); | |||
| } | |||
| // pack8 | |||
| { | |||
| pipeline_padding_pack8 = new Pipeline(vkdev); | |||
| pipeline_padding_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_padding_pack8->create("padding_pack8", opt, specializations, 3, 12); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -58,6 +66,9 @@ int Padding_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_padding_pack4; | |||
| pipeline_padding_pack4 = 0; | |||
| delete pipeline_padding_pack8; | |||
| pipeline_padding_pack8 = 0; | |||
| return 0; | |||
| } | |||
| @@ -66,19 +77,7 @@ int Padding_vulkan::upload_model(VkTransfer& cmd, const Option& opt) | |||
| if (per_channel_pad_data_size == 0) | |||
| return 0; | |||
| // pack1 | |||
| if (per_channel_pad_data_size % 4 != 0) | |||
| { | |||
| cmd.record_upload(per_channel_pad_data, per_channel_pad_data_gpu, opt); | |||
| } | |||
| // pack4 | |||
| if (per_channel_pad_data_size % 4 == 0) | |||
| { | |||
| Mat per_channel_pad_data_pack4; | |||
| convert_packing(per_channel_pad_data, per_channel_pad_data_pack4, 4); | |||
| cmd.record_upload(per_channel_pad_data_pack4, per_channel_pad_data_gpu_pack4, opt); | |||
| } | |||
| cmd.record_upload(per_channel_pad_data, per_channel_pad_data_gpu, opt); | |||
| return 0; | |||
| } | |||
| @@ -109,7 +108,7 @@ int Padding_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute | |||
| std::vector<VkMat> bindings(3); | |||
| bindings[0] = bottom_blob; | |||
| bindings[1] = top_blob; | |||
| bindings[2] = per_channel_pad_data_size ? (elempack == 4 ? per_channel_pad_data_gpu_pack4 : per_channel_pad_data_gpu) : top_blob;// TODO use dummy buffer | |||
| bindings[2] = per_channel_pad_data_size ? per_channel_pad_data_gpu : top_blob;// TODO use dummy buffer | |||
| std::vector<vk_constant_type> constants(12); | |||
| constants[0].i = bottom_blob.dims; | |||
| @@ -125,7 +124,9 @@ int Padding_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute | |||
| constants[10].i = left; | |||
| constants[11].i = top; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_padding_pack4 : pipeline_padding; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_padding_pack8 | |||
| : elempack == 4 ? pipeline_padding_pack4 | |||
| : pipeline_padding; | |||
| cmd.record_pipeline(pipeline, bindings, constants, top_blob); | |||
| @@ -176,7 +177,7 @@ int Padding_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector< | |||
| std::vector<VkMat> bindings(3); | |||
| bindings[0] = bottom_blob; | |||
| bindings[1] = top_blob; | |||
| bindings[2] = per_channel_pad_data_size ? (elempack == 4 ? per_channel_pad_data_gpu_pack4 : per_channel_pad_data_gpu) : top_blob;// TODO use dummy buffer | |||
| bindings[2] = per_channel_pad_data_size ? per_channel_pad_data_gpu : top_blob;// TODO use dummy buffer | |||
| std::vector<vk_constant_type> constants(12); | |||
| constants[0].i = bottom_blob.dims; | |||
| @@ -192,7 +193,9 @@ int Padding_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector< | |||
| constants[10].i = _left; | |||
| constants[11].i = _top; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_padding_pack4 : pipeline_padding; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_padding_pack8 | |||
| : elempack == 4 ? pipeline_padding_pack4 | |||
| : pipeline_padding; | |||
| cmd.record_pipeline(pipeline, bindings, constants, top_blob); | |||
| @@ -37,9 +37,8 @@ public: | |||
| public: | |||
| VkMat per_channel_pad_data_gpu; | |||
| Pipeline* pipeline_padding; | |||
| VkMat per_channel_pad_data_gpu_pack4; | |||
| Pipeline* pipeline_padding_pack4; | |||
| Pipeline* pipeline_padding_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -27,9 +27,11 @@ Pooling_vulkan::Pooling_vulkan() | |||
| padding = 0; | |||
| pipeline_pooling = 0; | |||
| pipeline_pooling_global = 0; | |||
| pipeline_pooling_pack4 = 0; | |||
| pipeline_pooling_pack8 = 0; | |||
| pipeline_pooling_global = 0; | |||
| pipeline_pooling_global_pack4 = 0; | |||
| pipeline_pooling_global_pack8 = 0; | |||
| } | |||
| int Pooling_vulkan::create_pipeline(const Option& opt) | |||
| @@ -59,34 +61,6 @@ int Pooling_vulkan::create_pipeline(const Option& opt) | |||
| padding->create_pipeline(opt); | |||
| } | |||
| std::vector<vk_specialization_type> specializations(12); | |||
| specializations[0].i = pooling_type; | |||
| specializations[1].i = kernel_w; | |||
| specializations[2].i = kernel_h; | |||
| specializations[3].i = stride_w; | |||
| specializations[4].i = stride_h; | |||
| specializations[5].i = pad_left; | |||
| specializations[6].i = pad_right; | |||
| specializations[7].i = pad_top; | |||
| specializations[8].i = pad_bottom; | |||
| specializations[9].i = global_pooling; | |||
| specializations[10].i = pad_mode; | |||
| specializations[11].i = avgpool_count_include_pad; | |||
| // pack1 | |||
| { | |||
| pipeline_pooling = new Pipeline(vkdev); | |||
| pipeline_pooling->set_optimal_local_size_xyz(); | |||
| pipeline_pooling->create("pooling", opt, specializations, 2, 12); | |||
| } | |||
| // pack4 | |||
| { | |||
| pipeline_pooling_pack4 = new Pipeline(vkdev); | |||
| pipeline_pooling_pack4->set_optimal_local_size_xyz(); | |||
| pipeline_pooling_pack4->create("pooling_pack4", opt, specializations, 2, 12); | |||
| } | |||
| if (global_pooling) | |||
| { | |||
| std::vector<vk_specialization_type> specializations(1); | |||
| @@ -105,6 +79,50 @@ int Pooling_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_pooling_global_pack4->set_optimal_local_size_xyz(256, 1, 1); | |||
| pipeline_pooling_global_pack4->create("pooling_global_pack4", opt, specializations, 2, 12); | |||
| } | |||
| // pack8 | |||
| { | |||
| pipeline_pooling_global_pack8 = new Pipeline(vkdev); | |||
| pipeline_pooling_global_pack8->set_optimal_local_size_xyz(256, 1, 1); | |||
| pipeline_pooling_global_pack8->create("pooling_global_pack8", opt, specializations, 2, 12); | |||
| } | |||
| } | |||
| else | |||
| { | |||
| std::vector<vk_specialization_type> specializations(12); | |||
| specializations[0].i = pooling_type; | |||
| specializations[1].i = kernel_w; | |||
| specializations[2].i = kernel_h; | |||
| specializations[3].i = stride_w; | |||
| specializations[4].i = stride_h; | |||
| specializations[5].i = pad_left; | |||
| specializations[6].i = pad_right; | |||
| specializations[7].i = pad_top; | |||
| specializations[8].i = pad_bottom; | |||
| specializations[9].i = global_pooling; | |||
| specializations[10].i = pad_mode; | |||
| specializations[11].i = avgpool_count_include_pad; | |||
| // pack1 | |||
| { | |||
| pipeline_pooling = new Pipeline(vkdev); | |||
| pipeline_pooling->set_optimal_local_size_xyz(); | |||
| pipeline_pooling->create("pooling", opt, specializations, 2, 12); | |||
| } | |||
| // pack4 | |||
| { | |||
| pipeline_pooling_pack4 = new Pipeline(vkdev); | |||
| pipeline_pooling_pack4->set_optimal_local_size_xyz(); | |||
| pipeline_pooling_pack4->create("pooling_pack4", opt, specializations, 2, 12); | |||
| } | |||
| // pack8 | |||
| { | |||
| pipeline_pooling_pack8 = new Pipeline(vkdev); | |||
| pipeline_pooling_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_pooling_pack8->create("pooling_pack8", opt, specializations, 2, 12); | |||
| } | |||
| } | |||
| return 0; | |||
| @@ -125,12 +143,18 @@ int Pooling_vulkan::destroy_pipeline(const Option& opt) | |||
| delete pipeline_pooling_pack4; | |||
| pipeline_pooling_pack4 = 0; | |||
| delete pipeline_pooling_pack8; | |||
| pipeline_pooling_pack8 = 0; | |||
| delete pipeline_pooling_global; | |||
| pipeline_pooling_global = 0; | |||
| delete pipeline_pooling_global_pack4; | |||
| pipeline_pooling_global_pack4 = 0; | |||
| delete pipeline_pooling_global_pack8; | |||
| pipeline_pooling_global_pack8 = 0; | |||
| return 0; | |||
| } | |||
| @@ -166,7 +190,9 @@ int Pooling_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute | |||
| constants[10].i = 0; | |||
| constants[11].i = 0; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_pooling_global_pack4 : pipeline_pooling_global; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_pooling_global_pack8 | |||
| : elempack == 4 ? pipeline_pooling_global_pack4 | |||
| : pipeline_pooling_global; | |||
| cmd.record_pipeline(pipeline, bindings, constants, top_blob); | |||
| @@ -298,7 +324,9 @@ int Pooling_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute | |||
| constants[10].i = wtailpad; | |||
| constants[11].i = htailpad; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_pooling_pack4 : pipeline_pooling; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_pooling_pack8 | |||
| : elempack == 4 ? pipeline_pooling_pack4 | |||
| : pipeline_pooling; | |||
| cmd.record_pipeline(pipeline, bindings, constants, top_blob); | |||
| @@ -34,9 +34,11 @@ public: | |||
| ncnn::Layer* padding; | |||
| Pipeline* pipeline_pooling; | |||
| Pipeline* pipeline_pooling_global; | |||
| Pipeline* pipeline_pooling_pack4; | |||
| Pipeline* pipeline_pooling_pack8; | |||
| Pipeline* pipeline_pooling_global; | |||
| Pipeline* pipeline_pooling_global_pack4; | |||
| Pipeline* pipeline_pooling_global_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -24,6 +24,7 @@ ReLU_vulkan::ReLU_vulkan() | |||
| pipeline_relu = 0; | |||
| pipeline_relu_pack4 = 0; | |||
| pipeline_relu_pack8 = 0; | |||
| } | |||
| int ReLU_vulkan::create_pipeline(const Option& opt) | |||
| @@ -45,6 +46,13 @@ int ReLU_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_relu_pack4->create("relu_pack4", opt, specializations, 1, 5); | |||
| } | |||
| // pack8 | |||
| { | |||
| pipeline_relu_pack8 = new Pipeline(vkdev); | |||
| pipeline_relu_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_relu_pack8->create("relu_pack8", opt, specializations, 1, 5); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -56,6 +64,9 @@ int ReLU_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_relu_pack4; | |||
| pipeline_relu_pack4 = 0; | |||
| delete pipeline_relu_pack8; | |||
| pipeline_relu_pack8 = 0; | |||
| return 0; | |||
| } | |||
| @@ -73,7 +84,9 @@ int ReLU_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const O | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_relu_pack4 : pipeline_relu; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_relu_pack8 | |||
| : elempack == 4 ? pipeline_relu_pack4 | |||
| : pipeline_relu; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| @@ -33,6 +33,7 @@ public: | |||
| public: | |||
| Pipeline* pipeline_relu; | |||
| Pipeline* pipeline_relu_pack4; | |||
| Pipeline* pipeline_relu_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -25,6 +25,7 @@ Sigmoid_vulkan::Sigmoid_vulkan() | |||
| pipeline_sigmoid = 0; | |||
| pipeline_sigmoid_pack4 = 0; | |||
| pipeline_sigmoid_pack8 = 0; | |||
| } | |||
| int Sigmoid_vulkan::create_pipeline(const Option& opt) | |||
| @@ -45,6 +46,13 @@ int Sigmoid_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_sigmoid_pack4->create("sigmoid_pack4", opt, specializations, 1, 5); | |||
| } | |||
| // pack8 | |||
| { | |||
| pipeline_sigmoid_pack8 = new Pipeline(vkdev); | |||
| pipeline_sigmoid_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_sigmoid_pack8->create("sigmoid_pack8", opt, specializations, 1, 5); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -56,6 +64,9 @@ int Sigmoid_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_sigmoid_pack4; | |||
| pipeline_sigmoid_pack4 = 0; | |||
| delete pipeline_sigmoid_pack8; | |||
| pipeline_sigmoid_pack8 = 0; | |||
| return 0; | |||
| } | |||
| @@ -73,7 +84,9 @@ int Sigmoid_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_sigmoid_pack4 : pipeline_sigmoid; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_sigmoid_pack8 | |||
| : elempack == 4 ? pipeline_sigmoid_pack4 | |||
| : pipeline_sigmoid; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| @@ -33,6 +33,7 @@ public: | |||
| public: | |||
| Pipeline* pipeline_sigmoid; | |||
| Pipeline* pipeline_sigmoid_pack4; | |||
| Pipeline* pipeline_sigmoid_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -34,6 +34,11 @@ Softmax_vulkan::Softmax_vulkan() | |||
| pipeline_softmax_exp_sub_max_pack4 = 0; | |||
| pipeline_softmax_reduce_sum_pack4 = 0; | |||
| pipeline_softmax_div_sum_pack4 = 0; | |||
| pipeline_softmax_reduce_max_pack8 = 0; | |||
| pipeline_softmax_exp_sub_max_pack8 = 0; | |||
| pipeline_softmax_reduce_sum_pack8 = 0; | |||
| pipeline_softmax_div_sum_pack8 = 0; | |||
| } | |||
| int Softmax_vulkan::create_pipeline(const Option& opt) | |||
| @@ -77,6 +82,24 @@ int Softmax_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_softmax_div_sum_pack4->create("softmax_div_sum_pack4", opt, specializations, 2, 10); | |||
| } | |||
| // pack8 | |||
| { | |||
| pipeline_softmax_reduce_max_pack8 = new Pipeline(vkdev); | |||
| pipeline_softmax_exp_sub_max_pack8 = new Pipeline(vkdev); | |||
| pipeline_softmax_reduce_sum_pack8 = new Pipeline(vkdev); | |||
| pipeline_softmax_div_sum_pack8 = new Pipeline(vkdev); | |||
| pipeline_softmax_reduce_max_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_softmax_exp_sub_max_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_softmax_reduce_sum_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_softmax_div_sum_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_softmax_reduce_max_pack8->create("softmax_reduce_max_pack8", opt, specializations, 2, 10); | |||
| pipeline_softmax_exp_sub_max_pack8->create("softmax_exp_sub_max_pack8", opt, specializations, 2, 10); | |||
| pipeline_softmax_reduce_sum_pack8->create("softmax_reduce_sum_pack8", opt, specializations, 2, 10); | |||
| pipeline_softmax_div_sum_pack8->create("softmax_div_sum_pack8", opt, specializations, 2, 10); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -106,6 +129,18 @@ int Softmax_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_softmax_div_sum_pack4; | |||
| pipeline_softmax_div_sum_pack4 = 0; | |||
| delete pipeline_softmax_reduce_max_pack8; | |||
| pipeline_softmax_reduce_max_pack8 = 0; | |||
| delete pipeline_softmax_exp_sub_max_pack8; | |||
| pipeline_softmax_exp_sub_max_pack8 = 0; | |||
| delete pipeline_softmax_reduce_sum_pack8; | |||
| pipeline_softmax_reduce_sum_pack8 = 0; | |||
| delete pipeline_softmax_div_sum_pack8; | |||
| pipeline_softmax_div_sum_pack8 = 0; | |||
| return 0; | |||
| } | |||
| @@ -170,7 +205,9 @@ int Softmax_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons | |||
| constants[8].i = max_workspace.c; | |||
| constants[9].i = max_workspace.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_softmax_reduce_max_pack4 : pipeline_softmax_reduce_max; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_softmax_reduce_max_pack8 | |||
| : elempack == 4 ? pipeline_softmax_reduce_max_pack4 | |||
| : pipeline_softmax_reduce_max; | |||
| cmd.record_pipeline(pipeline, bindings, constants, max_workspace); | |||
| } | |||
| @@ -193,7 +230,9 @@ int Softmax_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons | |||
| constants[8].i = max_workspace.c; | |||
| constants[9].i = max_workspace.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_softmax_exp_sub_max_pack4 : pipeline_softmax_exp_sub_max; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_softmax_exp_sub_max_pack8 | |||
| : elempack == 4 ? pipeline_softmax_exp_sub_max_pack4 | |||
| : pipeline_softmax_exp_sub_max; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| } | |||
| @@ -216,7 +255,9 @@ int Softmax_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons | |||
| constants[8].i = sum_workspace.c; | |||
| constants[9].i = sum_workspace.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_softmax_reduce_sum_pack4 : pipeline_softmax_reduce_sum; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_softmax_reduce_sum_pack8 | |||
| : elempack == 4 ? pipeline_softmax_reduce_sum_pack4 | |||
| : pipeline_softmax_reduce_sum; | |||
| cmd.record_pipeline(pipeline, bindings, constants, sum_workspace); | |||
| } | |||
| @@ -239,7 +280,9 @@ int Softmax_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons | |||
| constants[8].i = sum_workspace.c; | |||
| constants[9].i = sum_workspace.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_softmax_div_sum_pack4 : pipeline_softmax_div_sum; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_softmax_div_sum_pack8 | |||
| : elempack == 4 ? pipeline_softmax_div_sum_pack4 | |||
| : pipeline_softmax_div_sum; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| } | |||
| @@ -40,6 +40,11 @@ public: | |||
| Pipeline* pipeline_softmax_exp_sub_max_pack4; | |||
| Pipeline* pipeline_softmax_reduce_sum_pack4; | |||
| Pipeline* pipeline_softmax_div_sum_pack4; | |||
| Pipeline* pipeline_softmax_reduce_max_pack8; | |||
| Pipeline* pipeline_softmax_exp_sub_max_pack8; | |||
| Pipeline* pipeline_softmax_reduce_sum_pack8; | |||
| Pipeline* pipeline_softmax_div_sum_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -25,6 +25,7 @@ TanH_vulkan::TanH_vulkan() | |||
| pipeline_tanh = 0; | |||
| pipeline_tanh_pack4 = 0; | |||
| pipeline_tanh_pack8 = 0; | |||
| } | |||
| int TanH_vulkan::create_pipeline(const Option& opt) | |||
| @@ -45,6 +46,13 @@ int TanH_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_tanh_pack4->create("tanh_pack4", opt, specializations, 1, 5); | |||
| } | |||
| // pack8 | |||
| { | |||
| pipeline_tanh_pack8 = new Pipeline(vkdev); | |||
| pipeline_tanh_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_tanh_pack8->create("tanh_pack8", opt, specializations, 1, 5); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -56,6 +64,9 @@ int TanH_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_tanh_pack4; | |||
| pipeline_tanh_pack4 = 0; | |||
| delete pipeline_tanh_pack8; | |||
| pipeline_tanh_pack8 = 0; | |||
| return 0; | |||
| } | |||
| @@ -73,7 +84,9 @@ int TanH_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const O | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_tanh_pack4 : pipeline_tanh; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_tanh_pack8 | |||
| : elempack == 4 ? pipeline_tanh_pack4 | |||
| : pipeline_tanh; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| @@ -33,6 +33,7 @@ public: | |||
| public: | |||
| Pipeline* pipeline_tanh; | |||
| Pipeline* pipeline_tanh_pack4; | |||
| Pipeline* pipeline_tanh_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -24,6 +24,7 @@ UnaryOp_vulkan::UnaryOp_vulkan() | |||
| pipeline_unaryop = 0; | |||
| pipeline_unaryop_pack4 = 0; | |||
| pipeline_unaryop_pack8 = 0; | |||
| } | |||
| int UnaryOp_vulkan::create_pipeline(const Option& opt) | |||
| @@ -45,6 +46,13 @@ int UnaryOp_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_unaryop_pack4->create("unaryop_pack4", opt, specializations, 1, 5); | |||
| } | |||
| // pack8 | |||
| { | |||
| pipeline_unaryop_pack8 = new Pipeline(vkdev); | |||
| pipeline_unaryop_pack8->set_optimal_local_size_xyz(); | |||
| pipeline_unaryop_pack8->create("unaryop_pack8", opt, specializations, 1, 5); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -56,6 +64,9 @@ int UnaryOp_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_unaryop_pack4; | |||
| pipeline_unaryop_pack4 = 0; | |||
| delete pipeline_unaryop_pack8; | |||
| pipeline_unaryop_pack8 = 0; | |||
| return 0; | |||
| } | |||
| @@ -73,7 +84,9 @@ int UnaryOp_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 4 ? pipeline_unaryop_pack4 : pipeline_unaryop; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_unaryop_pack8 | |||
| : elempack == 4 ? pipeline_unaryop_pack4 | |||
| : pipeline_unaryop; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| @@ -33,6 +33,7 @@ public: | |||
| public: | |||
| Pipeline* pipeline_unaryop; | |||
| Pipeline* pipeline_unaryop_pack4; | |||
| Pipeline* pipeline_unaryop_pack8; | |||
| }; | |||
| } // namespace ncnn | |||