| @@ -12,8 +12,6 @@ CELU_vulkan::CELU_vulkan() | |||
| support_vulkan = true; | |||
| pipeline_celu = 0; | |||
| pipeline_celu_pack4 = 0; | |||
| pipeline_celu_pack8 = 0; | |||
| } | |||
| int CELU_vulkan::create_pipeline(const Option& opt) | |||
| @@ -41,63 +39,15 @@ int CELU_vulkan::create_pipeline(const Option& opt) | |||
| if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| std::vector<vk_specialization_type> specializations(1 + 5); | |||
| std::vector<vk_specialization_type> specializations(1 + 1); | |||
| specializations[0].f = alpha; | |||
| specializations[1 + 0].i = shape_packed.dims; | |||
| specializations[1 + 1].i = shape_packed.w; | |||
| specializations[1 + 2].i = shape_packed.h * shape_packed.d; | |||
| specializations[1 + 3].i = shape_packed.c; | |||
| specializations[1 + 4].i = shape_packed.cstep; | |||
| Mat local_size_xyz; | |||
| if (shape_packed.dims == 1) | |||
| { | |||
| local_size_xyz.w = std::min(64, shape_packed.w); | |||
| local_size_xyz.h = 1; | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 2) | |||
| { | |||
| local_size_xyz.w = std::min(8, shape_packed.w); | |||
| local_size_xyz.h = std::min(8, shape_packed.h); | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 3) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| if (shape_packed.dims == 4) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| // pack1 | |||
| if (shape.dims == 0 || elempack == 1) | |||
| { | |||
| pipeline_celu = new Pipeline(vkdev); | |||
| pipeline_celu->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_celu->create(LayerShaderType::celu, opt, specializations); | |||
| } | |||
| specializations[1 + 0].u32 = shape_packed.total() * elempack / 4; | |||
| // pack4 | |||
| if (shape.dims == 0 || elempack == 4) | |||
| { | |||
| pipeline_celu_pack4 = new Pipeline(vkdev); | |||
| pipeline_celu_pack4->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_celu_pack4->create(LayerShaderType::celu_pack4, opt, specializations); | |||
| } | |||
| const int local_size_x = vkdev->info.subgroup_size(); | |||
| // pack8 | |||
| if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) | |||
| { | |||
| pipeline_celu_pack8 = new Pipeline(vkdev); | |||
| pipeline_celu_pack8->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_celu_pack8->create(LayerShaderType::celu_pack8, opt, specializations); | |||
| } | |||
| pipeline_celu = new Pipeline(vkdev); | |||
| pipeline_celu->set_optimal_local_size_xyz(local_size_x, 1, 1); | |||
| pipeline_celu->create(LayerShaderType::celu, opt, specializations); | |||
| return 0; | |||
| } | |||
| @@ -107,34 +57,24 @@ int CELU_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_celu; | |||
| pipeline_celu = 0; | |||
| delete pipeline_celu_pack4; | |||
| pipeline_celu_pack4 = 0; | |||
| delete pipeline_celu_pack8; | |||
| pipeline_celu_pack8 = 0; | |||
| return 0; | |||
| } | |||
| int CELU_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const | |||
| { | |||
| int elempack = bottom_top_blob.elempack; | |||
| const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; | |||
| std::vector<VkMat> bindings(1); | |||
| bindings[0] = bottom_top_blob; | |||
| std::vector<vk_constant_type> constants(5); | |||
| constants[0].i = bottom_top_blob.dims; | |||
| constants[1].i = bottom_top_blob.w; | |||
| constants[2].i = bottom_top_blob.h * bottom_top_blob.d; | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_celu_pack8 | |||
| : elempack == 4 ? pipeline_celu_pack4 | |||
| : pipeline_celu; | |||
| std::vector<vk_constant_type> constants(1); | |||
| constants[0].u32 = n; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| VkMat dispatcher; | |||
| dispatcher.w = n; | |||
| dispatcher.h = 1; | |||
| dispatcher.c = 1; | |||
| cmd.record_pipeline(pipeline_celu, bindings, constants, dispatcher); | |||
| return 0; | |||
| } | |||
| @@ -21,8 +21,6 @@ public: | |||
| public: | |||
| Pipeline* pipeline_celu; | |||
| Pipeline* pipeline_celu_pack4; | |||
| Pipeline* pipeline_celu_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -12,8 +12,6 @@ Clip_vulkan::Clip_vulkan() | |||
| support_vulkan = true; | |||
| pipeline_clip = 0; | |||
| pipeline_clip_pack4 = 0; | |||
| pipeline_clip_pack8 = 0; | |||
| } | |||
| int Clip_vulkan::create_pipeline(const Option& opt) | |||
| @@ -41,64 +39,16 @@ int Clip_vulkan::create_pipeline(const Option& opt) | |||
| if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| std::vector<vk_specialization_type> specializations(2 + 5); | |||
| std::vector<vk_specialization_type> specializations(2 + 1); | |||
| specializations[0].f = min; | |||
| specializations[1].f = max; | |||
| specializations[2 + 0].i = shape_packed.dims; | |||
| specializations[2 + 1].i = shape_packed.w; | |||
| specializations[2 + 2].i = shape_packed.h * shape_packed.d; | |||
| specializations[2 + 3].i = shape_packed.c; | |||
| specializations[2 + 4].i = shape_packed.cstep; | |||
| Mat local_size_xyz; | |||
| if (shape_packed.dims == 1) | |||
| { | |||
| local_size_xyz.w = std::min(64, shape_packed.w); | |||
| local_size_xyz.h = 1; | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 2) | |||
| { | |||
| local_size_xyz.w = std::min(8, shape_packed.w); | |||
| local_size_xyz.h = std::min(8, shape_packed.h); | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 3) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| if (shape_packed.dims == 4) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| // pack1 | |||
| if (shape.dims == 0 || elempack == 1) | |||
| { | |||
| pipeline_clip = new Pipeline(vkdev); | |||
| pipeline_clip->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_clip->create(LayerShaderType::clip, opt, specializations); | |||
| } | |||
| specializations[2 + 0].u32 = shape_packed.total() * elempack / 4; | |||
| // pack4 | |||
| if (shape.dims == 0 || elempack == 4) | |||
| { | |||
| pipeline_clip_pack4 = new Pipeline(vkdev); | |||
| pipeline_clip_pack4->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_clip_pack4->create(LayerShaderType::clip_pack4, opt, specializations); | |||
| } | |||
| const int local_size_x = vkdev->info.subgroup_size(); | |||
| // pack8 | |||
| if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) | |||
| { | |||
| pipeline_clip_pack8 = new Pipeline(vkdev); | |||
| pipeline_clip_pack8->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_clip_pack8->create(LayerShaderType::clip_pack8, opt, specializations); | |||
| } | |||
| pipeline_clip = new Pipeline(vkdev); | |||
| pipeline_clip->set_optimal_local_size_xyz(local_size_x, 1, 1); | |||
| pipeline_clip->create(LayerShaderType::clip, opt, specializations); | |||
| return 0; | |||
| } | |||
| @@ -108,34 +58,24 @@ int Clip_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_clip; | |||
| pipeline_clip = 0; | |||
| delete pipeline_clip_pack4; | |||
| pipeline_clip_pack4 = 0; | |||
| delete pipeline_clip_pack8; | |||
| pipeline_clip_pack8 = 0; | |||
| return 0; | |||
| } | |||
| int Clip_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const | |||
| { | |||
| int elempack = bottom_top_blob.elempack; | |||
| const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; | |||
| std::vector<VkMat> bindings(1); | |||
| bindings[0] = bottom_top_blob; | |||
| std::vector<vk_constant_type> constants(5); | |||
| constants[0].i = bottom_top_blob.dims; | |||
| constants[1].i = bottom_top_blob.w; | |||
| constants[2].i = bottom_top_blob.h * bottom_top_blob.d; | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_clip_pack8 | |||
| : elempack == 4 ? pipeline_clip_pack4 | |||
| : pipeline_clip; | |||
| std::vector<vk_constant_type> constants(1); | |||
| constants[0].u32 = n; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| VkMat dispatcher; | |||
| dispatcher.w = n; | |||
| dispatcher.h = 1; | |||
| dispatcher.c = 1; | |||
| cmd.record_pipeline(pipeline_clip, bindings, constants, dispatcher); | |||
| return 0; | |||
| } | |||
| @@ -21,8 +21,6 @@ public: | |||
| public: | |||
| Pipeline* pipeline_clip; | |||
| Pipeline* pipeline_clip_pack4; | |||
| Pipeline* pipeline_clip_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -12,8 +12,6 @@ Dropout_vulkan::Dropout_vulkan() | |||
| support_vulkan = true; | |||
| pipeline_dropout = 0; | |||
| pipeline_dropout_pack4 = 0; | |||
| pipeline_dropout_pack8 = 0; | |||
| } | |||
| int Dropout_vulkan::create_pipeline(const Option& opt) | |||
| @@ -40,57 +38,15 @@ int Dropout_vulkan::create_pipeline(const Option& opt) | |||
| if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack); | |||
| if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| std::vector<vk_specialization_type> specializations(1 + 5); | |||
| std::vector<vk_specialization_type> specializations(1 + 1); | |||
| specializations[0].f = scale; | |||
| specializations[1 + 0].i = shape_packed.dims; | |||
| specializations[1 + 1].i = shape_packed.w; | |||
| specializations[1 + 2].i = shape_packed.h; | |||
| specializations[1 + 3].i = shape_packed.c; | |||
| specializations[1 + 4].i = shape_packed.cstep; | |||
| Mat local_size_xyz; | |||
| if (shape_packed.dims == 1) | |||
| { | |||
| local_size_xyz.w = std::min(64, shape_packed.w); | |||
| local_size_xyz.h = 1; | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 2) | |||
| { | |||
| local_size_xyz.w = std::min(8, shape_packed.w); | |||
| local_size_xyz.h = std::min(8, shape_packed.h); | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 3) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| // pack1 | |||
| if (shape.dims == 0 || elempack == 1) | |||
| { | |||
| pipeline_dropout = new Pipeline(vkdev); | |||
| pipeline_dropout->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_dropout->create(LayerShaderType::dropout, opt, specializations); | |||
| } | |||
| specializations[1 + 0].u32 = shape_packed.total() * elempack / 4; | |||
| // pack4 | |||
| if (shape.dims == 0 || elempack == 4) | |||
| { | |||
| pipeline_dropout_pack4 = new Pipeline(vkdev); | |||
| pipeline_dropout_pack4->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_dropout_pack4->create(LayerShaderType::dropout_pack4, opt, specializations); | |||
| } | |||
| const int local_size_x = vkdev->info.subgroup_size(); | |||
| // pack8 | |||
| if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) | |||
| { | |||
| pipeline_dropout_pack8 = new Pipeline(vkdev); | |||
| pipeline_dropout_pack8->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_dropout_pack8->create(LayerShaderType::dropout_pack8, opt, specializations); | |||
| } | |||
| pipeline_dropout = new Pipeline(vkdev); | |||
| pipeline_dropout->set_optimal_local_size_xyz(local_size_x, 1, 1); | |||
| pipeline_dropout->create(LayerShaderType::dropout, opt, specializations); | |||
| return 0; | |||
| } | |||
| @@ -100,12 +56,6 @@ int Dropout_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_dropout; | |||
| pipeline_dropout = 0; | |||
| delete pipeline_dropout_pack4; | |||
| pipeline_dropout_pack4 = 0; | |||
| delete pipeline_dropout_pack8; | |||
| pipeline_dropout_pack8 = 0; | |||
| return 0; | |||
| } | |||
| @@ -116,23 +66,19 @@ int Dropout_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons | |||
| return 0; | |||
| } | |||
| int elempack = bottom_top_blob.elempack; | |||
| const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; | |||
| std::vector<VkMat> bindings(1); | |||
| bindings[0] = bottom_top_blob; | |||
| std::vector<vk_constant_type> constants(5); | |||
| constants[0].i = bottom_top_blob.dims; | |||
| constants[1].i = bottom_top_blob.w; | |||
| constants[2].i = bottom_top_blob.h; | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_dropout_pack8 | |||
| : elempack == 4 ? pipeline_dropout_pack4 | |||
| : pipeline_dropout; | |||
| std::vector<vk_constant_type> constants(1); | |||
| constants[0].u32 = n; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| VkMat dispatcher; | |||
| dispatcher.w = n; | |||
| dispatcher.h = 1; | |||
| dispatcher.c = 1; | |||
| cmd.record_pipeline(pipeline_dropout, bindings, constants, dispatcher); | |||
| return 0; | |||
| } | |||
| @@ -21,8 +21,6 @@ public: | |||
| public: | |||
| Pipeline* pipeline_dropout; | |||
| Pipeline* pipeline_dropout_pack4; | |||
| Pipeline* pipeline_dropout_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -12,8 +12,6 @@ ELU_vulkan::ELU_vulkan() | |||
| support_vulkan = true; | |||
| pipeline_elu = 0; | |||
| pipeline_elu_pack4 = 0; | |||
| pipeline_elu_pack8 = 0; | |||
| } | |||
| int ELU_vulkan::create_pipeline(const Option& opt) | |||
| @@ -41,63 +39,15 @@ int ELU_vulkan::create_pipeline(const Option& opt) | |||
| if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| std::vector<vk_specialization_type> specializations(1 + 5); | |||
| std::vector<vk_specialization_type> specializations(1 + 1); | |||
| specializations[0].f = alpha; | |||
| specializations[1 + 0].i = shape_packed.dims; | |||
| specializations[1 + 1].i = shape_packed.w; | |||
| specializations[1 + 2].i = shape_packed.h * shape_packed.d; | |||
| specializations[1 + 3].i = shape_packed.c; | |||
| specializations[1 + 4].i = shape_packed.cstep; | |||
| Mat local_size_xyz; | |||
| if (shape_packed.dims == 1) | |||
| { | |||
| local_size_xyz.w = std::min(64, shape_packed.w); | |||
| local_size_xyz.h = 1; | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 2) | |||
| { | |||
| local_size_xyz.w = std::min(8, shape_packed.w); | |||
| local_size_xyz.h = std::min(8, shape_packed.h); | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 3) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| if (shape_packed.dims == 4) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| // pack1 | |||
| if (shape.dims == 0 || elempack == 1) | |||
| { | |||
| pipeline_elu = new Pipeline(vkdev); | |||
| pipeline_elu->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_elu->create(LayerShaderType::elu, opt, specializations); | |||
| } | |||
| specializations[1 + 0].u32 = shape_packed.total() * elempack / 4; | |||
| // pack4 | |||
| if (shape.dims == 0 || elempack == 4) | |||
| { | |||
| pipeline_elu_pack4 = new Pipeline(vkdev); | |||
| pipeline_elu_pack4->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_elu_pack4->create(LayerShaderType::elu_pack4, opt, specializations); | |||
| } | |||
| const int local_size_x = vkdev->info.subgroup_size(); | |||
| // pack8 | |||
| if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) | |||
| { | |||
| pipeline_elu_pack8 = new Pipeline(vkdev); | |||
| pipeline_elu_pack8->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_elu_pack8->create(LayerShaderType::elu_pack8, opt, specializations); | |||
| } | |||
| pipeline_elu = new Pipeline(vkdev); | |||
| pipeline_elu->set_optimal_local_size_xyz(local_size_x, 1, 1); | |||
| pipeline_elu->create(LayerShaderType::elu, opt, specializations); | |||
| return 0; | |||
| } | |||
| @@ -107,34 +57,24 @@ int ELU_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_elu; | |||
| pipeline_elu = 0; | |||
| delete pipeline_elu_pack4; | |||
| pipeline_elu_pack4 = 0; | |||
| delete pipeline_elu_pack8; | |||
| pipeline_elu_pack8 = 0; | |||
| return 0; | |||
| } | |||
| int ELU_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const | |||
| { | |||
| int elempack = bottom_top_blob.elempack; | |||
| const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; | |||
| std::vector<VkMat> bindings(1); | |||
| bindings[0] = bottom_top_blob; | |||
| std::vector<vk_constant_type> constants(5); | |||
| constants[0].i = bottom_top_blob.dims; | |||
| constants[1].i = bottom_top_blob.w; | |||
| constants[2].i = bottom_top_blob.h * bottom_top_blob.d; | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_elu_pack8 | |||
| : elempack == 4 ? pipeline_elu_pack4 | |||
| : pipeline_elu; | |||
| std::vector<vk_constant_type> constants(1); | |||
| constants[0].u32 = n; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| VkMat dispatcher; | |||
| dispatcher.w = n; | |||
| dispatcher.h = 1; | |||
| dispatcher.c = 1; | |||
| cmd.record_pipeline(pipeline_elu, bindings, constants, dispatcher); | |||
| return 0; | |||
| } | |||
| @@ -21,8 +21,6 @@ public: | |||
| public: | |||
| Pipeline* pipeline_elu; | |||
| Pipeline* pipeline_elu_pack4; | |||
| Pipeline* pipeline_elu_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -12,8 +12,6 @@ Erf_vulkan::Erf_vulkan() | |||
| support_vulkan = true; | |||
| pipeline_erf = 0; | |||
| pipeline_erf_pack4 = 0; | |||
| pipeline_erf_pack8 = 0; | |||
| } | |||
| int Erf_vulkan::create_pipeline(const Option& opt) | |||
| @@ -40,56 +38,14 @@ int Erf_vulkan::create_pipeline(const Option& opt) | |||
| if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack); | |||
| if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| std::vector<vk_specialization_type> specializations(0 + 5); | |||
| specializations[0 + 0].i = shape_packed.dims; | |||
| specializations[0 + 1].i = shape_packed.w; | |||
| specializations[0 + 2].i = shape_packed.h; | |||
| specializations[0 + 3].i = shape_packed.c; | |||
| specializations[0 + 4].i = shape_packed.cstep; | |||
| std::vector<vk_specialization_type> specializations(1); | |||
| specializations[0].u32 = shape_packed.total() * elempack / 4; | |||
| Mat local_size_xyz; | |||
| if (shape_packed.dims == 1) | |||
| { | |||
| local_size_xyz.w = std::min(64, shape_packed.w); | |||
| local_size_xyz.h = 1; | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 2) | |||
| { | |||
| local_size_xyz.w = std::min(8, shape_packed.w); | |||
| local_size_xyz.h = std::min(8, shape_packed.h); | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 3) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| const int local_size_x = vkdev->info.subgroup_size(); | |||
| // pack1 | |||
| if (shape.dims == 0 || elempack == 1) | |||
| { | |||
| pipeline_erf = new Pipeline(vkdev); | |||
| pipeline_erf->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_erf->create(LayerShaderType::erf, opt, specializations); | |||
| } | |||
| // pack4 | |||
| if (shape.dims == 0 || elempack == 4) | |||
| { | |||
| pipeline_erf_pack4 = new Pipeline(vkdev); | |||
| pipeline_erf_pack4->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_erf_pack4->create(LayerShaderType::erf_pack4, opt, specializations); | |||
| } | |||
| // pack8 | |||
| if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) | |||
| { | |||
| pipeline_erf_pack8 = new Pipeline(vkdev); | |||
| pipeline_erf_pack8->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_erf_pack8->create(LayerShaderType::erf_pack8, opt, specializations); | |||
| } | |||
| pipeline_erf = new Pipeline(vkdev); | |||
| pipeline_erf->set_optimal_local_size_xyz(local_size_x, 1, 1); | |||
| pipeline_erf->create(LayerShaderType::erf, opt, specializations); | |||
| return 0; | |||
| } | |||
| @@ -99,34 +55,24 @@ int Erf_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_erf; | |||
| pipeline_erf = 0; | |||
| delete pipeline_erf_pack4; | |||
| pipeline_erf_pack4 = 0; | |||
| delete pipeline_erf_pack8; | |||
| pipeline_erf_pack8 = 0; | |||
| return 0; | |||
| } | |||
| int Erf_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const | |||
| { | |||
| int elempack = bottom_top_blob.elempack; | |||
| const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; | |||
| std::vector<VkMat> bindings(1); | |||
| bindings[0] = bottom_top_blob; | |||
| std::vector<vk_constant_type> constants(5); | |||
| constants[0].i = bottom_top_blob.dims; | |||
| constants[1].i = bottom_top_blob.w; | |||
| constants[2].i = bottom_top_blob.h; | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_erf_pack8 | |||
| : elempack == 4 ? pipeline_erf_pack4 | |||
| : pipeline_erf; | |||
| std::vector<vk_constant_type> constants(1); | |||
| constants[0].u32 = n; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| VkMat dispatcher; | |||
| dispatcher.w = n; | |||
| dispatcher.h = 1; | |||
| dispatcher.c = 1; | |||
| cmd.record_pipeline(pipeline_erf, bindings, constants, dispatcher); | |||
| return 0; | |||
| } | |||
| @@ -21,8 +21,6 @@ public: | |||
| public: | |||
| Pipeline* pipeline_erf; | |||
| Pipeline* pipeline_erf_pack4; | |||
| Pipeline* pipeline_erf_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -12,8 +12,6 @@ GELU_vulkan::GELU_vulkan() | |||
| support_vulkan = true; | |||
| pipeline_gelu = 0; | |||
| pipeline_gelu_pack4 = 0; | |||
| pipeline_gelu_pack8 = 0; | |||
| } | |||
| int GELU_vulkan::create_pipeline(const Option& opt) | |||
| @@ -41,62 +39,14 @@ int GELU_vulkan::create_pipeline(const Option& opt) | |||
| if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| std::vector<vk_specialization_type> specializations(0 + 5); | |||
| specializations[0 + 0].i = shape_packed.dims; | |||
| specializations[0 + 1].i = shape_packed.w; | |||
| specializations[0 + 2].i = shape_packed.h * shape_packed.d; | |||
| specializations[0 + 3].i = shape_packed.c; | |||
| specializations[0 + 4].i = shape_packed.cstep; | |||
| std::vector<vk_specialization_type> specializations(1); | |||
| specializations[0].u32 = shape_packed.total() * elempack / 4; | |||
| Mat local_size_xyz; | |||
| if (shape_packed.dims == 1) | |||
| { | |||
| local_size_xyz.w = std::min(64, shape_packed.w); | |||
| local_size_xyz.h = 1; | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 2) | |||
| { | |||
| local_size_xyz.w = std::min(8, shape_packed.w); | |||
| local_size_xyz.h = std::min(8, shape_packed.h); | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 3) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| if (shape_packed.dims == 4) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| const int local_size_x = vkdev->info.subgroup_size(); | |||
| // pack1 | |||
| if (shape.dims == 0 || elempack == 1) | |||
| { | |||
| pipeline_gelu = new Pipeline(vkdev); | |||
| pipeline_gelu->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_gelu->create(LayerShaderType::gelu, opt, specializations); | |||
| } | |||
| // pack4 | |||
| if (shape.dims == 0 || elempack == 4) | |||
| { | |||
| pipeline_gelu_pack4 = new Pipeline(vkdev); | |||
| pipeline_gelu_pack4->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_gelu_pack4->create(LayerShaderType::gelu_pack4, opt, specializations); | |||
| } | |||
| // pack8 | |||
| if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) | |||
| { | |||
| pipeline_gelu_pack8 = new Pipeline(vkdev); | |||
| pipeline_gelu_pack8->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_gelu_pack8->create(LayerShaderType::gelu_pack8, opt, specializations); | |||
| } | |||
| pipeline_gelu = new Pipeline(vkdev); | |||
| pipeline_gelu->set_optimal_local_size_xyz(local_size_x, 1, 1); | |||
| pipeline_gelu->create(LayerShaderType::gelu, opt, specializations); | |||
| return 0; | |||
| } | |||
| @@ -106,34 +56,24 @@ int GELU_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_gelu; | |||
| pipeline_gelu = 0; | |||
| delete pipeline_gelu_pack4; | |||
| pipeline_gelu_pack4 = 0; | |||
| delete pipeline_gelu_pack8; | |||
| pipeline_gelu_pack8 = 0; | |||
| return 0; | |||
| } | |||
| int GELU_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const | |||
| { | |||
| int elempack = bottom_top_blob.elempack; | |||
| const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; | |||
| std::vector<VkMat> bindings(1); | |||
| bindings[0] = bottom_top_blob; | |||
| std::vector<vk_constant_type> constants(5); | |||
| constants[0].i = bottom_top_blob.dims; | |||
| constants[1].i = bottom_top_blob.w; | |||
| constants[2].i = bottom_top_blob.h * bottom_top_blob.d; | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_gelu_pack8 | |||
| : elempack == 4 ? pipeline_gelu_pack4 | |||
| : pipeline_gelu; | |||
| std::vector<vk_constant_type> constants(1); | |||
| constants[0].u32 = n; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| VkMat dispatcher; | |||
| dispatcher.w = n; | |||
| dispatcher.h = 1; | |||
| dispatcher.c = 1; | |||
| cmd.record_pipeline(pipeline_gelu, bindings, constants, dispatcher); | |||
| return 0; | |||
| } | |||
| @@ -21,8 +21,6 @@ public: | |||
| public: | |||
| Pipeline* pipeline_gelu; | |||
| Pipeline* pipeline_gelu_pack4; | |||
| Pipeline* pipeline_gelu_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -12,8 +12,6 @@ HardSigmoid_vulkan::HardSigmoid_vulkan() | |||
| support_vulkan = true; | |||
| pipeline_hardsigmoid = 0; | |||
| pipeline_hardsigmoid_pack4 = 0; | |||
| pipeline_hardsigmoid_pack8 = 0; | |||
| } | |||
| int HardSigmoid_vulkan::create_pipeline(const Option& opt) | |||
| @@ -40,58 +38,16 @@ int HardSigmoid_vulkan::create_pipeline(const Option& opt) | |||
| if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack); | |||
| if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| std::vector<vk_specialization_type> specializations(2 + 5); | |||
| std::vector<vk_specialization_type> specializations(2 + 1); | |||
| specializations[0].f = alpha; | |||
| specializations[1].f = beta; | |||
| specializations[2 + 0].i = shape_packed.dims; | |||
| specializations[2 + 1].i = shape_packed.w; | |||
| specializations[2 + 2].i = shape_packed.h; | |||
| specializations[2 + 3].i = shape_packed.c; | |||
| specializations[2 + 4].i = shape_packed.cstep; | |||
| Mat local_size_xyz; | |||
| if (shape_packed.dims == 1) | |||
| { | |||
| local_size_xyz.w = std::min(64, shape_packed.w); | |||
| local_size_xyz.h = 1; | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 2) | |||
| { | |||
| local_size_xyz.w = std::min(8, shape_packed.w); | |||
| local_size_xyz.h = std::min(8, shape_packed.h); | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 3) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| // pack1 | |||
| if (shape.dims == 0 || elempack == 1) | |||
| { | |||
| pipeline_hardsigmoid = new Pipeline(vkdev); | |||
| pipeline_hardsigmoid->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_hardsigmoid->create(LayerShaderType::hardsigmoid, opt, specializations); | |||
| } | |||
| specializations[2 + 0].u32 = shape_packed.total() * elempack / 4; | |||
| // pack4 | |||
| if (shape.dims == 0 || elempack == 4) | |||
| { | |||
| pipeline_hardsigmoid_pack4 = new Pipeline(vkdev); | |||
| pipeline_hardsigmoid_pack4->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_hardsigmoid_pack4->create(LayerShaderType::hardsigmoid_pack4, opt, specializations); | |||
| } | |||
| const int local_size_x = vkdev->info.subgroup_size(); | |||
| // pack8 | |||
| if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) | |||
| { | |||
| pipeline_hardsigmoid_pack8 = new Pipeline(vkdev); | |||
| pipeline_hardsigmoid_pack8->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_hardsigmoid_pack8->create(LayerShaderType::hardsigmoid_pack8, opt, specializations); | |||
| } | |||
| pipeline_hardsigmoid = new Pipeline(vkdev); | |||
| pipeline_hardsigmoid->set_optimal_local_size_xyz(local_size_x, 1, 1); | |||
| pipeline_hardsigmoid->create(LayerShaderType::hardsigmoid, opt, specializations); | |||
| return 0; | |||
| } | |||
| @@ -101,34 +57,24 @@ int HardSigmoid_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_hardsigmoid; | |||
| pipeline_hardsigmoid = 0; | |||
| delete pipeline_hardsigmoid_pack4; | |||
| pipeline_hardsigmoid_pack4 = 0; | |||
| delete pipeline_hardsigmoid_pack8; | |||
| pipeline_hardsigmoid_pack8 = 0; | |||
| return 0; | |||
| } | |||
| int HardSigmoid_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const | |||
| { | |||
| int elempack = bottom_top_blob.elempack; | |||
| const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; | |||
| std::vector<VkMat> bindings(1); | |||
| bindings[0] = bottom_top_blob; | |||
| std::vector<vk_constant_type> constants(5); | |||
| constants[0].i = bottom_top_blob.dims; | |||
| constants[1].i = bottom_top_blob.w; | |||
| constants[2].i = bottom_top_blob.h; | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_hardsigmoid_pack8 | |||
| : elempack == 4 ? pipeline_hardsigmoid_pack4 | |||
| : pipeline_hardsigmoid; | |||
| std::vector<vk_constant_type> constants(1); | |||
| constants[0].u32 = n; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| VkMat dispatcher; | |||
| dispatcher.w = n; | |||
| dispatcher.h = 1; | |||
| dispatcher.c = 1; | |||
| cmd.record_pipeline(pipeline_hardsigmoid, bindings, constants, dispatcher); | |||
| return 0; | |||
| } | |||
| @@ -21,8 +21,6 @@ public: | |||
| public: | |||
| Pipeline* pipeline_hardsigmoid; | |||
| Pipeline* pipeline_hardsigmoid_pack4; | |||
| Pipeline* pipeline_hardsigmoid_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -12,8 +12,6 @@ HardSwish_vulkan::HardSwish_vulkan() | |||
| support_vulkan = true; | |||
| pipeline_hardswish = 0; | |||
| pipeline_hardswish_pack4 = 0; | |||
| pipeline_hardswish_pack8 = 0; | |||
| } | |||
| int HardSwish_vulkan::create_pipeline(const Option& opt) | |||
| @@ -40,58 +38,16 @@ int HardSwish_vulkan::create_pipeline(const Option& opt) | |||
| if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack); | |||
| if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| std::vector<vk_specialization_type> specializations(2 + 5); | |||
| std::vector<vk_specialization_type> specializations(2 + 1); | |||
| specializations[0].f = alpha; | |||
| specializations[1].f = beta; | |||
| specializations[2 + 0].i = shape_packed.dims; | |||
| specializations[2 + 1].i = shape_packed.w; | |||
| specializations[2 + 2].i = shape_packed.h; | |||
| specializations[2 + 3].i = shape_packed.c; | |||
| specializations[2 + 4].i = shape_packed.cstep; | |||
| Mat local_size_xyz; | |||
| if (shape_packed.dims == 1) | |||
| { | |||
| local_size_xyz.w = std::min(64, shape_packed.w); | |||
| local_size_xyz.h = 1; | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 2) | |||
| { | |||
| local_size_xyz.w = std::min(8, shape_packed.w); | |||
| local_size_xyz.h = std::min(8, shape_packed.h); | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 3) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| // pack1 | |||
| if (shape.dims == 0 || elempack == 1) | |||
| { | |||
| pipeline_hardswish = new Pipeline(vkdev); | |||
| pipeline_hardswish->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_hardswish->create(LayerShaderType::hardswish, opt, specializations); | |||
| } | |||
| specializations[2 + 0].u32 = shape_packed.total() * elempack / 4; | |||
| // pack4 | |||
| if (shape.dims == 0 || elempack == 4) | |||
| { | |||
| pipeline_hardswish_pack4 = new Pipeline(vkdev); | |||
| pipeline_hardswish_pack4->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_hardswish_pack4->create(LayerShaderType::hardswish_pack4, opt, specializations); | |||
| } | |||
| const int local_size_x = vkdev->info.subgroup_size(); | |||
| // pack8 | |||
| if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) | |||
| { | |||
| pipeline_hardswish_pack8 = new Pipeline(vkdev); | |||
| pipeline_hardswish_pack8->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_hardswish_pack8->create(LayerShaderType::hardswish_pack8, opt, specializations); | |||
| } | |||
| pipeline_hardswish = new Pipeline(vkdev); | |||
| pipeline_hardswish->set_optimal_local_size_xyz(local_size_x, 1, 1); | |||
| pipeline_hardswish->create(LayerShaderType::hardswish, opt, specializations); | |||
| return 0; | |||
| } | |||
| @@ -101,34 +57,24 @@ int HardSwish_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_hardswish; | |||
| pipeline_hardswish = 0; | |||
| delete pipeline_hardswish_pack4; | |||
| pipeline_hardswish_pack4 = 0; | |||
| delete pipeline_hardswish_pack8; | |||
| pipeline_hardswish_pack8 = 0; | |||
| return 0; | |||
| } | |||
| int HardSwish_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const | |||
| { | |||
| int elempack = bottom_top_blob.elempack; | |||
| const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; | |||
| std::vector<VkMat> bindings(1); | |||
| bindings[0] = bottom_top_blob; | |||
| std::vector<vk_constant_type> constants(5); | |||
| constants[0].i = bottom_top_blob.dims; | |||
| constants[1].i = bottom_top_blob.w; | |||
| constants[2].i = bottom_top_blob.h; | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_hardswish_pack8 | |||
| : elempack == 4 ? pipeline_hardswish_pack4 | |||
| : pipeline_hardswish; | |||
| std::vector<vk_constant_type> constants(1); | |||
| constants[0].u32 = n; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| VkMat dispatcher; | |||
| dispatcher.w = n; | |||
| dispatcher.h = 1; | |||
| dispatcher.c = 1; | |||
| cmd.record_pipeline(pipeline_hardswish, bindings, constants, dispatcher); | |||
| return 0; | |||
| } | |||
| @@ -21,8 +21,6 @@ public: | |||
| public: | |||
| Pipeline* pipeline_hardswish; | |||
| Pipeline* pipeline_hardswish_pack4; | |||
| Pipeline* pipeline_hardswish_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -12,8 +12,6 @@ Mish_vulkan::Mish_vulkan() | |||
| support_vulkan = true; | |||
| pipeline_mish = 0; | |||
| pipeline_mish_pack4 = 0; | |||
| pipeline_mish_pack8 = 0; | |||
| } | |||
| int Mish_vulkan::create_pipeline(const Option& opt) | |||
| @@ -41,62 +39,14 @@ int Mish_vulkan::create_pipeline(const Option& opt) | |||
| if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| std::vector<vk_specialization_type> specializations(0 + 5); | |||
| specializations[0 + 0].i = shape_packed.dims; | |||
| specializations[0 + 1].i = shape_packed.w; | |||
| specializations[0 + 2].i = shape_packed.h * shape_packed.d; | |||
| specializations[0 + 3].i = shape_packed.c; | |||
| specializations[0 + 4].i = shape_packed.cstep; | |||
| std::vector<vk_specialization_type> specializations(1); | |||
| specializations[0].u32 = shape_packed.total() * elempack / 4; | |||
| Mat local_size_xyz; | |||
| if (shape_packed.dims == 1) | |||
| { | |||
| local_size_xyz.w = std::min(64, shape_packed.w); | |||
| local_size_xyz.h = 1; | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 2) | |||
| { | |||
| local_size_xyz.w = std::min(8, shape_packed.w); | |||
| local_size_xyz.h = std::min(8, shape_packed.h); | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 3) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| if (shape_packed.dims == 4) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| const int local_size_x = vkdev->info.subgroup_size(); | |||
| // pack1 | |||
| if (shape.dims == 0 || elempack == 1) | |||
| { | |||
| pipeline_mish = new Pipeline(vkdev); | |||
| pipeline_mish->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_mish->create(LayerShaderType::mish, opt, specializations); | |||
| } | |||
| // pack4 | |||
| if (shape.dims == 0 || elempack == 4) | |||
| { | |||
| pipeline_mish_pack4 = new Pipeline(vkdev); | |||
| pipeline_mish_pack4->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_mish_pack4->create(LayerShaderType::mish_pack4, opt, specializations); | |||
| } | |||
| // pack8 | |||
| if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) | |||
| { | |||
| pipeline_mish_pack8 = new Pipeline(vkdev); | |||
| pipeline_mish_pack8->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_mish_pack8->create(LayerShaderType::mish_pack8, opt, specializations); | |||
| } | |||
| pipeline_mish = new Pipeline(vkdev); | |||
| pipeline_mish->set_optimal_local_size_xyz(local_size_x, 1, 1); | |||
| pipeline_mish->create(LayerShaderType::mish, opt, specializations); | |||
| return 0; | |||
| } | |||
| @@ -106,34 +56,24 @@ int Mish_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_mish; | |||
| pipeline_mish = 0; | |||
| delete pipeline_mish_pack4; | |||
| pipeline_mish_pack4 = 0; | |||
| delete pipeline_mish_pack8; | |||
| pipeline_mish_pack8 = 0; | |||
| return 0; | |||
| } | |||
| int Mish_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const | |||
| { | |||
| int elempack = bottom_top_blob.elempack; | |||
| const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; | |||
| std::vector<VkMat> bindings(1); | |||
| bindings[0] = bottom_top_blob; | |||
| std::vector<vk_constant_type> constants(5); | |||
| constants[0].i = bottom_top_blob.dims; | |||
| constants[1].i = bottom_top_blob.w; | |||
| constants[2].i = bottom_top_blob.h * bottom_top_blob.d; | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_mish_pack8 | |||
| : elempack == 4 ? pipeline_mish_pack4 | |||
| : pipeline_mish; | |||
| std::vector<vk_constant_type> constants(1); | |||
| constants[0].u32 = n; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| VkMat dispatcher; | |||
| dispatcher.w = n; | |||
| dispatcher.h = 1; | |||
| dispatcher.c = 1; | |||
| cmd.record_pipeline(pipeline_mish, bindings, constants, dispatcher); | |||
| return 0; | |||
| } | |||
| @@ -21,8 +21,6 @@ public: | |||
| public: | |||
| Pipeline* pipeline_mish; | |||
| Pipeline* pipeline_mish_pack4; | |||
| Pipeline* pipeline_mish_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -12,8 +12,6 @@ ReLU_vulkan::ReLU_vulkan() | |||
| support_vulkan = true; | |||
| pipeline_relu = 0; | |||
| pipeline_relu_pack4 = 0; | |||
| pipeline_relu_pack8 = 0; | |||
| } | |||
| int ReLU_vulkan::create_pipeline(const Option& opt) | |||
| @@ -41,63 +39,15 @@ int ReLU_vulkan::create_pipeline(const Option& opt) | |||
| if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| std::vector<vk_specialization_type> specializations(1 + 5); | |||
| std::vector<vk_specialization_type> specializations(1 + 1); | |||
| specializations[0].f = slope; | |||
| specializations[1 + 0].i = shape_packed.dims; | |||
| specializations[1 + 1].i = shape_packed.w; | |||
| specializations[1 + 2].i = shape_packed.h * shape_packed.d; | |||
| specializations[1 + 3].i = shape_packed.c; | |||
| specializations[1 + 4].i = shape_packed.cstep; | |||
| Mat local_size_xyz; | |||
| if (shape_packed.dims == 1) | |||
| { | |||
| local_size_xyz.w = std::min(64, shape_packed.w); | |||
| local_size_xyz.h = 1; | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 2) | |||
| { | |||
| local_size_xyz.w = std::min(8, shape_packed.w); | |||
| local_size_xyz.h = std::min(8, shape_packed.h); | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 3) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| if (shape_packed.dims == 4) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| // pack1 | |||
| if (shape.dims == 0 || elempack == 1) | |||
| { | |||
| pipeline_relu = new Pipeline(vkdev); | |||
| pipeline_relu->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_relu->create(LayerShaderType::relu, opt, specializations); | |||
| } | |||
| specializations[1 + 0].u32 = shape_packed.total() * elempack / 4; | |||
| // pack4 | |||
| if (shape.dims == 0 || elempack == 4) | |||
| { | |||
| pipeline_relu_pack4 = new Pipeline(vkdev); | |||
| pipeline_relu_pack4->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_relu_pack4->create(LayerShaderType::relu_pack4, opt, specializations); | |||
| } | |||
| const int local_size_x = vkdev->info.subgroup_size(); | |||
| // pack8 | |||
| if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) | |||
| { | |||
| pipeline_relu_pack8 = new Pipeline(vkdev); | |||
| pipeline_relu_pack8->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_relu_pack8->create(LayerShaderType::relu_pack8, opt, specializations); | |||
| } | |||
| pipeline_relu = new Pipeline(vkdev); | |||
| pipeline_relu->set_optimal_local_size_xyz(local_size_x, 1, 1); | |||
| pipeline_relu->create(LayerShaderType::relu, opt, specializations); | |||
| return 0; | |||
| } | |||
| @@ -107,34 +57,24 @@ int ReLU_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_relu; | |||
| pipeline_relu = 0; | |||
| delete pipeline_relu_pack4; | |||
| pipeline_relu_pack4 = 0; | |||
| delete pipeline_relu_pack8; | |||
| pipeline_relu_pack8 = 0; | |||
| return 0; | |||
| } | |||
| int ReLU_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const | |||
| { | |||
| int elempack = bottom_top_blob.elempack; | |||
| const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; | |||
| std::vector<VkMat> bindings(1); | |||
| bindings[0] = bottom_top_blob; | |||
| std::vector<vk_constant_type> constants(5); | |||
| constants[0].i = bottom_top_blob.dims; | |||
| constants[1].i = bottom_top_blob.w; | |||
| constants[2].i = bottom_top_blob.h * bottom_top_blob.d; | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_relu_pack8 | |||
| : elempack == 4 ? pipeline_relu_pack4 | |||
| : pipeline_relu; | |||
| std::vector<vk_constant_type> constants(1); | |||
| constants[0].u32 = n; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| VkMat dispatcher; | |||
| dispatcher.w = n; | |||
| dispatcher.h = 1; | |||
| dispatcher.c = 1; | |||
| cmd.record_pipeline(pipeline_relu, bindings, constants, dispatcher); | |||
| return 0; | |||
| } | |||
| @@ -21,8 +21,6 @@ public: | |||
| public: | |||
| Pipeline* pipeline_relu; | |||
| Pipeline* pipeline_relu_pack4; | |||
| Pipeline* pipeline_relu_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -6,37 +6,25 @@ | |||
| layout (constant_id = 0) const float alpha = 0; | |||
| #define shape_constant_id_offset 1 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (constant_id = shape_constant_id_offset + 0) const uint n = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| uint n; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| const uint gi = gl_GlobalInvocationID.x; | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| if (gi >= psc(n)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| afp v = buffer_ld1(bottom_top_blob_data, gi); | |||
| v = max(v, afpvec4(0.0f)) + min(alpha * (exp(v / afpvec4(alpha)) - afpvec4(1.0f)), afpvec4(0.0f)); | |||
| v = max(v, afp(0.0f)) + min(alpha * (exp(v / alpha) - 1.0f), afp(0.0f)); | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,42 +0,0 @@ | |||
| // Copyright 2019 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| layout (constant_id = 0) const float alpha = 0; | |||
| #define shape_constant_id_offset 1 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| v = max(v, afp(0.0f)) + min(alpha * (exp(v / alpha) - 1.0f), afp(0.0f)); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,43 +0,0 @@ | |||
| // Copyright 2020 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| layout (constant_id = 0) const float alpha = 0; | |||
| #define shape_constant_id_offset 1 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); | |||
| v[0] = max(v[0], afp(0.0f)) + min(alpha * (exp(v[0] / alpha) - 1.0f), afp(0.0f)); | |||
| v[1] = max(v[1], afp(0.0f)) + min(alpha*(exp(v[1]/alpha)-1), afp(0.0f)); | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -7,37 +7,25 @@ layout (constant_id = 0) const float const_min = 0; | |||
| layout (constant_id = 1) const float const_max = 0; | |||
| #define shape_constant_id_offset 2 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (constant_id = shape_constant_id_offset + 0) const uint n = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| uint n; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| const uint gi = gl_GlobalInvocationID.x; | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| if (gi >= psc(n)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| afp v = buffer_ld1(bottom_top_blob_data, gi); | |||
| v = clamp(v, afpvec4(const_min), afpvec4(const_max)); | |||
| v = clamp(v, afp(const_min), afp(const_max)); | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,43 +0,0 @@ | |||
| // Copyright 2019 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| layout (constant_id = 0) const float const_min = 0; | |||
| layout (constant_id = 1) const float const_max = 0; | |||
| #define shape_constant_id_offset 2 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| v = clamp(v, afp(const_min), afp(const_max)); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,44 +0,0 @@ | |||
| // Copyright 2020 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| layout (constant_id = 0) const float const_min = 0; | |||
| layout (constant_id = 1) const float const_max = 0; | |||
| #define shape_constant_id_offset 2 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); | |||
| v[0] = clamp(v[0], afp(const_min), afp(const_max)); | |||
| v[1] = clamp(v[1], afp(const_min), afp(const_max)); | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,4 +1,4 @@ | |||
| // Copyright 2018 Tencent | |||
| // Copyright 2019 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| @@ -6,37 +6,26 @@ | |||
| layout (constant_id = 0) const float scale = 1; | |||
| #define shape_constant_id_offset 1 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (constant_id = shape_constant_id_offset + 0) const uint n = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| uint n; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| uint gi = gl_GlobalInvocationID.x; | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| if (gi >= psc(n)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afp v = buffer_ld1(bottom_top_blob_data, gi); | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| v *= afp(scale); | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,42 +0,0 @@ | |||
| // Copyright 2019 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| layout (constant_id = 0) const float scale = 1; | |||
| #define shape_constant_id_offset 1 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| v *= afp(scale); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,43 +0,0 @@ | |||
| // Copyright 2020 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| layout (constant_id = 0) const float scale = 1; | |||
| #define shape_constant_id_offset 1 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); | |||
| v[0] = v[0] * afp(scale); | |||
| v[1] = v[1] * afp(scale); | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -6,37 +6,25 @@ | |||
| layout (constant_id = 0) const float alpha = 0; | |||
| #define shape_constant_id_offset 1 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (constant_id = shape_constant_id_offset + 0) const uint n = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| uint n; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| const uint gi = gl_GlobalInvocationID.x; | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| if (gi >= psc(n)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| afp v = buffer_ld1(bottom_top_blob_data, gi); | |||
| v = mix(afpvec4(alpha) * afpvec4(exp(v) - afpvec4(1.0f)), v, greaterThan(v, afpvec4(0.0f))); | |||
| v = v > afp(0.0f) ? v : afp(alpha * (exp(v) - 1.0f)); | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,42 +0,0 @@ | |||
| // Copyright 2022 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| layout (constant_id = 0) const float alpha = 0; | |||
| #define shape_constant_id_offset 1 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| v = mix(afpvec4(alpha) * afpvec4(exp(v) - afpvec4(1.0f)), v, greaterThan(v, afpvec4(0.0f))); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,43 +0,0 @@ | |||
| // Copyright 2022 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| layout (constant_id = 0) const float alpha = 0; | |||
| #define shape_constant_id_offset 1 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); | |||
| v[0] = mix(afpvec4(alpha) * afpvec4(exp(v[0]) - afpvec4(1.0f)), v[0], greaterThan(v[0], afpvec4(0.0f))); | |||
| v[1] = mix(afpvec4(alpha) * afpvec4(exp(v[1]) - afpvec4(1.0f)), v[1], greaterThan(v[1], afpvec4(0.0f))); | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -3,53 +3,41 @@ | |||
| #version 450 | |||
| #define shape_constant_id_offset 0 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (constant_id = 0) const uint n = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| uint n; | |||
| } p; | |||
| float erf(float x) | |||
| afpvec4 erf(afpvec4 x) | |||
| { | |||
| float a1 = 0.254829592f; | |||
| float a2 = -0.284496736f; | |||
| float a3 = 1.421413741f; | |||
| float a4 = -1.453152027f; | |||
| float a5 = 1.061405429f; | |||
| float p = 0.3275911f; | |||
| float s = sign(x); | |||
| float x_abs = abs(x); | |||
| float t = 1.0f/(1.0f + p*x_abs); | |||
| float y = 1.0f - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x_abs*x_abs); | |||
| afpvec4 a1 = afpvec4(0.254829592f); | |||
| afpvec4 a2 = afpvec4(-0.284496736f); | |||
| afpvec4 a3 = afpvec4(1.421413741f); | |||
| afpvec4 a4 = afpvec4(-1.453152027f); | |||
| afpvec4 a5 = afpvec4(1.061405429f); | |||
| afpvec4 p = afpvec4(0.3275911f); | |||
| afpvec4 s = sign(x); | |||
| afpvec4 x_abs = abs(x); | |||
| afpvec4 t = 1.0f / (1.0f + p * x_abs); | |||
| afpvec4 y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x_abs * x_abs); | |||
| return s * y; | |||
| } | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| const uint gi = gl_GlobalInvocationID.x; | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| if (gi >= psc(n)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afp v = buffer_ld1(bottom_top_blob_data, gi); | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| v = erf(v); | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,55 +0,0 @@ | |||
| // Copyright 2019 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| #define shape_constant_id_offset 0 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| afpvec4 erf(afpvec4 x) | |||
| { | |||
| afpvec4 a1 = afpvec4(0.254829592f); | |||
| afpvec4 a2 = afpvec4(-0.284496736f); | |||
| afpvec4 a3 = afpvec4(1.421413741f); | |||
| afpvec4 a4 = afpvec4(-1.453152027f); | |||
| afpvec4 a5 = afpvec4(1.061405429f); | |||
| afpvec4 p = afpvec4(0.3275911f); | |||
| afpvec4 s = sign(x); | |||
| afpvec4 x_abs = abs(x); | |||
| afpvec4 t = 1.0f / (1.0f + p * x_abs); | |||
| afpvec4 y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x_abs * x_abs); | |||
| return s * y; | |||
| } | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| v = erf(v); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,56 +0,0 @@ | |||
| // Copyright 2020 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| #define shape_constant_id_offset 0 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| afpvec4 erf(afpvec4 x) | |||
| { | |||
| afpvec4 a1 = afpvec4(0.254829592f); | |||
| afpvec4 a2 = afpvec4(-0.284496736f); | |||
| afpvec4 a3 = afpvec4(1.421413741f); | |||
| afpvec4 a4 = afpvec4(-1.453152027f); | |||
| afpvec4 a5 = afpvec4(1.061405429f); | |||
| afpvec4 p = afpvec4(0.3275911f); | |||
| afpvec4 s = sign(x); | |||
| afpvec4 x_abs = abs(x); | |||
| afpvec4 t = 1.0f / (1.0f + p * x_abs); | |||
| afpvec4 y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x_abs * x_abs); | |||
| return s * y; | |||
| } | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); | |||
| v[0] = erf(v[0]); | |||
| v[1] = erf(v[1]); | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -4,43 +4,32 @@ | |||
| #version 450 | |||
| #define shape_constant_id_offset 0 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (constant_id = 0) const uint n = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| uint n; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| uint gi = gl_GlobalInvocationID.x; | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| if (gi >= psc(n)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afp v = buffer_ld1(bottom_top_blob_data, gi); | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| // y = 0.5x * (1 + tanh(sqrt(2/Pi) * (x + 0.044715x^3))) | |||
| #if NCNN_moltenvk | |||
| v = afp(0.5f) * v * (afp(1.0f) + afp(tanh(float(afp(0.79788452f) * (v + afp(0.044715f) * v * v * v))))); | |||
| v = afpvec4(0.5f) * v * (afpvec4(1.0f) + afpvec4(tanh(vec4(afpvec4(0.79788452f) * (v + afpvec4(0.044715f) * v * v * v))))); | |||
| #else | |||
| v = afp(0.5f) * v * (afp(1.0f) + tanh(afp(0.79788452f) * (v + afp(0.044715f) * v * v * v))); | |||
| v = afpvec4(0.5f) * v * (afpvec4(1.0f) + tanh(afpvec4(0.79788452f) * (v + afpvec4(0.044715f) * v * v * v))); | |||
| #endif | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,46 +0,0 @@ | |||
| // Copyright 2023 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| #define shape_constant_id_offset 0 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| // y = 0.5x * (1 + tanh(sqrt(2/Pi) * (x + 0.044715x^3))) | |||
| #if NCNN_moltenvk | |||
| v = afpvec4(0.5f) * v * (afpvec4(1.0f) + afpvec4(tanh(vec4(afpvec4(0.79788452f) * (v + afpvec4(0.044715f) * v * v * v))))); | |||
| #else | |||
| v = afpvec4(0.5f) * v * (afpvec4(1.0f) + tanh(afpvec4(0.79788452f) * (v + afpvec4(0.044715f) * v * v * v))); | |||
| #endif | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,48 +0,0 @@ | |||
| // Copyright 2023 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| #define shape_constant_id_offset 0 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); | |||
| // y = 0.5x * (1 + tanh(sqrt(2/Pi) * (x + 0.044715x^3))) | |||
| #if NCNN_moltenvk | |||
| v[0] = afpvec4(0.5f) * v[0] * (afpvec4(1.0f) + afpvec4(tanh(vec4(afpvec4(0.79788452f) * (v[0] + afpvec4(0.044715f) * v[0] * v[0] * v[0]))))); | |||
| v[1] = afpvec4(0.5f) * v[1] * (afpvec4(1.0f) + afpvec4(tanh(vec4(afpvec4(0.79788452f) * (v[1] + afpvec4(0.044715f) * v[1] * v[1] * v[1]))))); | |||
| #else | |||
| v[0] = afpvec4(0.5f) * v[0] * (afpvec4(1.0f) + tanh(afpvec4(0.79788452f) * (v[0] + afpvec4(0.044715f) * v[0] * v[0] * v[0]))); | |||
| v[1] = afpvec4(0.5f) * v[1] * (afpvec4(1.0f) + tanh(afpvec4(0.79788452f) * (v[1] + afpvec4(0.044715f) * v[1] * v[1] * v[1]))); | |||
| #endif | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,4 +1,4 @@ | |||
| // Copyright 2018 Tencent | |||
| // Copyright 2019 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| @@ -7,37 +7,26 @@ layout (constant_id = 0) const float alpha = 0; | |||
| layout (constant_id = 1) const float beta = 0; | |||
| #define shape_constant_id_offset 2 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (constant_id = shape_constant_id_offset + 0) const uint n = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| uint n; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| uint gi = gl_GlobalInvocationID.x; | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| if (gi >= psc(n)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afp v = buffer_ld1(bottom_top_blob_data, gi); | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| v = clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,43 +0,0 @@ | |||
| // Copyright 2019 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| layout (constant_id = 0) const float alpha = 0; | |||
| layout (constant_id = 1) const float beta = 0; | |||
| #define shape_constant_id_offset 2 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| v = clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,44 +0,0 @@ | |||
| // Copyright 2020 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| layout (constant_id = 0) const float alpha = 0; | |||
| layout (constant_id = 1) const float beta = 0; | |||
| #define shape_constant_id_offset 2 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); | |||
| v[0] = clamp(v[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v[1] = clamp(v[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,4 +1,4 @@ | |||
| // Copyright 2018 Tencent | |||
| // Copyright 2019 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| @@ -7,37 +7,25 @@ layout (constant_id = 0) const float alpha = 0; | |||
| layout (constant_id = 1) const float beta = 0; | |||
| #define shape_constant_id_offset 2 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (constant_id = shape_constant_id_offset + 0) const uint n = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| uint n; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| uint gi = gl_GlobalInvocationID.x; | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| if (gi >= psc(n)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afp v = buffer_ld1(bottom_top_blob_data, gi); | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| v = v * clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,43 +0,0 @@ | |||
| // Copyright 2019 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| layout (constant_id = 0) const float alpha = 0; | |||
| layout (constant_id = 1) const float beta = 0; | |||
| #define shape_constant_id_offset 2 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| v = v * clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,44 +0,0 @@ | |||
| // Copyright 2020 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| layout (constant_id = 0) const float alpha = 0; | |||
| layout (constant_id = 1) const float beta = 0; | |||
| #define shape_constant_id_offset 2 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); | |||
| v[0] = v[0] * clamp(v[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| v[1] = v[1] * clamp(v[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,44 +1,32 @@ | |||
| // Copyright 2020 Tencent | |||
| // Copyright 2019 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| #define shape_constant_id_offset 0 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (constant_id = 0) const uint n = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| uint n; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| uint gi = gl_GlobalInvocationID.x; | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| if (gi >= psc(n)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afp v = buffer_ld1(bottom_top_blob_data, gi); | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| #if NCNN_moltenvk | |||
| v = v * afp(tanh(float(log(exp(v) + afp(1.f))))); | |||
| v = v * afpvec4(tanh(vec4(log(exp(v) + afpvec4(1.f))))); | |||
| #else | |||
| v = v * tanh(log(exp(v) + afp(1.f))); | |||
| v = v * tanh(log(exp(v) + afpvec4(1.f))); | |||
| #endif | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,44 +0,0 @@ | |||
| // Copyright 2019 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| #define shape_constant_id_offset 0 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| #if NCNN_moltenvk | |||
| v = v * afpvec4(tanh(vec4(log(exp(v) + afpvec4(1.f))))); | |||
| #else | |||
| v = v * tanh(log(exp(v) + afpvec4(1.f))); | |||
| #endif | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,46 +0,0 @@ | |||
| // Copyright 2020 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| #define shape_constant_id_offset 0 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); | |||
| #if NCNN_moltenvk | |||
| v[0] = v[0] * afpvec4(tanh(vec4(log(exp(v[0]) + afpvec4(1.f))))); | |||
| v[1] = v[1] * afpvec4(tanh(vec4(log(exp(v[1]) + afpvec4(1.f))))); | |||
| #else | |||
| v[0] = v[0] * tanh(log(exp(v[0]) + afpvec4(1.f))); | |||
| v[1] = v[1] * tanh(log(exp(v[1]) + afpvec4(1.f))); | |||
| #endif | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,4 +1,4 @@ | |||
| // Copyright 2018 Tencent | |||
| // Copyright 2019 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| @@ -6,40 +6,29 @@ | |||
| layout (constant_id = 0) const float slope = 0; | |||
| #define shape_constant_id_offset 1 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (constant_id = shape_constant_id_offset + 0) const uint n = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| uint n; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| uint gi = gl_GlobalInvocationID.x; | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| if (gi >= psc(n)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afp v = buffer_ld1(bottom_top_blob_data, gi); | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| if (slope == 0) | |||
| v = max(v, afp(0.f)); | |||
| else | |||
| v = v < afp(0.f) ? v * afp(slope) : v; | |||
| v = mix(v, v * afp(slope), lessThan(v, afpvec4(0.f))); | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,45 +0,0 @@ | |||
| // Copyright 2019 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| layout (constant_id = 0) const float slope = 0; | |||
| #define shape_constant_id_offset 1 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| if (slope == 0) | |||
| v = max(v, afp(0.f)); | |||
| else | |||
| v = mix(v, v * afp(slope), lessThan(v, afpvec4(0.f))); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,51 +0,0 @@ | |||
| // Copyright 2020 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| layout (constant_id = 0) const float slope = 0; | |||
| #define shape_constant_id_offset 1 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); | |||
| if (slope == 0) | |||
| { | |||
| v[0] = max(v[0], afp(0.f)); | |||
| v[1] = max(v[1], afp(0.f)); | |||
| } | |||
| else | |||
| { | |||
| v[0] = mix(v[0], v[0] * afp(slope), lessThan(v[0], afpvec4(0.f))); | |||
| v[1] = mix(v[1], v[1] * afp(slope), lessThan(v[1], afpvec4(0.f))); | |||
| } | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,40 +1,27 @@ | |||
| // Copyright 2020 Tencent | |||
| // Copyright 2019 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| #define shape_constant_id_offset 0 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (constant_id = 0) const uint n = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| uint n; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| uint gi = gl_GlobalInvocationID.x; | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| if (gi >= psc(n)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| afp v = buffer_ld1(bottom_top_blob_data, gi); | |||
| v = v / (afpvec4(1.f) + exp(-v)); | |||
| v = v / (afp(1.f) + exp(-v)); | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,40 +0,0 @@ | |||
| // Copyright 2019 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| #define shape_constant_id_offset 0 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| v = v / (afpvec4(1.f) + exp(-v)); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,41 +0,0 @@ | |||
| // Copyright 2020 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| #define shape_constant_id_offset 0 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); | |||
| v[0] = v[0] / (afpvec4(1.f) + exp(-v[0])); | |||
| v[1] = v[1] / (afpvec4(1.f) + exp(-v[1])); | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -3,42 +3,30 @@ | |||
| #version 450 | |||
| #define shape_constant_id_offset 0 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (constant_id = 0) const uint n = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| uint n; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| uint gi = gl_GlobalInvocationID.x; | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| if (gi >= psc(n)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afp v = buffer_ld1(bottom_top_blob_data, gi); | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| #if NCNN_moltenvk | |||
| v = afp(tanh(float(v))); | |||
| v = afpvec4(tanh(vec4(v))); | |||
| #else | |||
| v = tanh(v); | |||
| #endif | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,44 +0,0 @@ | |||
| // Copyright 2019 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| #define shape_constant_id_offset 0 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec4 v = buffer_ld4(bottom_top_blob_data, gi); | |||
| #if NCNN_moltenvk | |||
| v = afpvec4(tanh(vec4(v))); | |||
| #else | |||
| v = tanh(v); | |||
| #endif | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -1,46 +0,0 @@ | |||
| // Copyright 2020 Tencent | |||
| // SPDX-License-Identifier: BSD-3-Clause | |||
| #version 450 | |||
| #define shape_constant_id_offset 0 | |||
| layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; | |||
| layout (constant_id = shape_constant_id_offset + 1) const int w = 0; | |||
| layout (constant_id = shape_constant_id_offset + 2) const int h = 0; | |||
| layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| afpvec8 v = buffer_ld8(bottom_top_blob_data, gi); | |||
| #if NCNN_moltenvk | |||
| v[0] = afpvec4(tanh(vec4(v[0]))); | |||
| v[1] = afpvec4(tanh(vec4(v[1]))); | |||
| #else | |||
| v[0] = tanh(v[0]); | |||
| v[1] = tanh(v[1]); | |||
| #endif | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| } | |||
| @@ -12,8 +12,6 @@ Swish_vulkan::Swish_vulkan() | |||
| support_vulkan = true; | |||
| pipeline_swish = 0; | |||
| pipeline_swish_pack4 = 0; | |||
| pipeline_swish_pack8 = 0; | |||
| } | |||
| int Swish_vulkan::create_pipeline(const Option& opt) | |||
| @@ -41,62 +39,14 @@ int Swish_vulkan::create_pipeline(const Option& opt) | |||
| if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| std::vector<vk_specialization_type> specializations(0 + 5); | |||
| specializations[0 + 0].i = shape_packed.dims; | |||
| specializations[0 + 1].i = shape_packed.w; | |||
| specializations[0 + 2].i = shape_packed.h * shape_packed.d; | |||
| specializations[0 + 3].i = shape_packed.c; | |||
| specializations[0 + 4].i = shape_packed.cstep; | |||
| std::vector<vk_specialization_type> specializations(1); | |||
| specializations[0 + 0].u32 = shape_packed.total() * elempack / 4; | |||
| Mat local_size_xyz; | |||
| if (shape_packed.dims == 1) | |||
| { | |||
| local_size_xyz.w = std::min(64, shape_packed.w); | |||
| local_size_xyz.h = 1; | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 2) | |||
| { | |||
| local_size_xyz.w = std::min(8, shape_packed.w); | |||
| local_size_xyz.h = std::min(8, shape_packed.h); | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 3) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| if (shape_packed.dims == 4) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| // pack1 | |||
| if (shape.dims == 0 || elempack == 1) | |||
| { | |||
| pipeline_swish = new Pipeline(vkdev); | |||
| pipeline_swish->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_swish->create(LayerShaderType::swish, opt, specializations); | |||
| } | |||
| const int local_size_x = vkdev->info.subgroup_size(); | |||
| // pack4 | |||
| if (shape.dims == 0 || elempack == 4) | |||
| { | |||
| pipeline_swish_pack4 = new Pipeline(vkdev); | |||
| pipeline_swish_pack4->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_swish_pack4->create(LayerShaderType::swish_pack4, opt, specializations); | |||
| } | |||
| // pack8 | |||
| if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) | |||
| { | |||
| pipeline_swish_pack8 = new Pipeline(vkdev); | |||
| pipeline_swish_pack8->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_swish_pack8->create(LayerShaderType::swish_pack8, opt, specializations); | |||
| } | |||
| pipeline_swish = new Pipeline(vkdev); | |||
| pipeline_swish->set_optimal_local_size_xyz(local_size_x, 1, 1); | |||
| pipeline_swish->create(LayerShaderType::swish, opt, specializations); | |||
| return 0; | |||
| } | |||
| @@ -106,36 +56,25 @@ int Swish_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_swish; | |||
| pipeline_swish = 0; | |||
| delete pipeline_swish_pack4; | |||
| pipeline_swish_pack4 = 0; | |||
| delete pipeline_swish_pack8; | |||
| pipeline_swish_pack8 = 0; | |||
| return 0; | |||
| } | |||
| int Swish_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const | |||
| { | |||
| int elempack = bottom_top_blob.elempack; | |||
| const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; | |||
| std::vector<VkMat> bindings(1); | |||
| bindings[0] = bottom_top_blob; | |||
| std::vector<vk_constant_type> constants(5); | |||
| constants[0].i = bottom_top_blob.dims; | |||
| constants[1].i = bottom_top_blob.w; | |||
| constants[2].i = bottom_top_blob.h * bottom_top_blob.d; | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| std::vector<vk_constant_type> constants(1); | |||
| constants[0].u32 = n; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_swish_pack8 | |||
| : elempack == 4 ? pipeline_swish_pack4 | |||
| : pipeline_swish; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| VkMat dispatcher; | |||
| dispatcher.w = n; | |||
| dispatcher.h = 1; | |||
| dispatcher.c = 1; | |||
| cmd.record_pipeline(pipeline_swish, bindings, constants, dispatcher); | |||
| return 0; | |||
| } | |||
| } // namespace ncnn | |||
| @@ -21,8 +21,6 @@ public: | |||
| public: | |||
| Pipeline* pipeline_swish; | |||
| Pipeline* pipeline_swish_pack4; | |||
| Pipeline* pipeline_swish_pack8; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -12,8 +12,6 @@ TanH_vulkan::TanH_vulkan() | |||
| support_vulkan = true; | |||
| pipeline_tanh = 0; | |||
| pipeline_tanh_pack4 = 0; | |||
| pipeline_tanh_pack8 = 0; | |||
| } | |||
| int TanH_vulkan::create_pipeline(const Option& opt) | |||
| @@ -41,62 +39,14 @@ int TanH_vulkan::create_pipeline(const Option& opt) | |||
| if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack); | |||
| std::vector<vk_specialization_type> specializations(0 + 5); | |||
| specializations[0 + 0].i = shape_packed.dims; | |||
| specializations[0 + 1].i = shape_packed.w; | |||
| specializations[0 + 2].i = shape_packed.h * shape_packed.d; | |||
| specializations[0 + 3].i = shape_packed.c; | |||
| specializations[0 + 4].i = shape_packed.cstep; | |||
| std::vector<vk_specialization_type> specializations(1); | |||
| specializations[0 + 0].u32 = shape_packed.total() * elempack / 4; | |||
| Mat local_size_xyz; | |||
| if (shape_packed.dims == 1) | |||
| { | |||
| local_size_xyz.w = std::min(64, shape_packed.w); | |||
| local_size_xyz.h = 1; | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 2) | |||
| { | |||
| local_size_xyz.w = std::min(8, shape_packed.w); | |||
| local_size_xyz.h = std::min(8, shape_packed.h); | |||
| local_size_xyz.c = 1; | |||
| } | |||
| if (shape_packed.dims == 3) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| if (shape_packed.dims == 4) | |||
| { | |||
| local_size_xyz.w = std::min(4, shape_packed.w); | |||
| local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d); | |||
| local_size_xyz.c = std::min(4, shape_packed.c); | |||
| } | |||
| const int local_size_x = vkdev->info.subgroup_size(); | |||
| // pack1 | |||
| if (shape.dims == 0 || elempack == 1) | |||
| { | |||
| pipeline_tanh = new Pipeline(vkdev); | |||
| pipeline_tanh->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_tanh->create(LayerShaderType::tanh, opt, specializations); | |||
| } | |||
| // pack4 | |||
| if (shape.dims == 0 || elempack == 4) | |||
| { | |||
| pipeline_tanh_pack4 = new Pipeline(vkdev); | |||
| pipeline_tanh_pack4->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_tanh_pack4->create(LayerShaderType::tanh_pack4, opt, specializations); | |||
| } | |||
| // pack8 | |||
| if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8) | |||
| { | |||
| pipeline_tanh_pack8 = new Pipeline(vkdev); | |||
| pipeline_tanh_pack8->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_tanh_pack8->create(LayerShaderType::tanh_pack8, opt, specializations); | |||
| } | |||
| pipeline_tanh = new Pipeline(vkdev); | |||
| pipeline_tanh->set_optimal_local_size_xyz(local_size_x, 1, 1); | |||
| pipeline_tanh->create(LayerShaderType::tanh, opt, specializations); | |||
| return 0; | |||
| } | |||
| @@ -106,34 +56,24 @@ int TanH_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| delete pipeline_tanh; | |||
| pipeline_tanh = 0; | |||
| delete pipeline_tanh_pack4; | |||
| pipeline_tanh_pack4 = 0; | |||
| delete pipeline_tanh_pack8; | |||
| pipeline_tanh_pack8 = 0; | |||
| return 0; | |||
| } | |||
| int TanH_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const | |||
| { | |||
| int elempack = bottom_top_blob.elempack; | |||
| const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4; | |||
| std::vector<VkMat> bindings(1); | |||
| bindings[0] = bottom_top_blob; | |||
| std::vector<vk_constant_type> constants(5); | |||
| constants[0].i = bottom_top_blob.dims; | |||
| constants[1].i = bottom_top_blob.w; | |||
| constants[2].i = bottom_top_blob.h * bottom_top_blob.d; | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = elempack == 8 ? pipeline_tanh_pack8 | |||
| : elempack == 4 ? pipeline_tanh_pack4 | |||
| : pipeline_tanh; | |||
| std::vector<vk_constant_type> constants(1); | |||
| constants[0].u32 = n; | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| VkMat dispatcher; | |||
| dispatcher.w = n; | |||
| dispatcher.h = 1; | |||
| dispatcher.c = 1; | |||
| cmd.record_pipeline(pipeline_tanh, bindings, constants, dispatcher); | |||
| return 0; | |||
| } | |||
| @@ -21,8 +21,6 @@ public: | |||
| public: | |||
| Pipeline* pipeline_tanh; | |||
| Pipeline* pipeline_tanh_pack4; | |||
| Pipeline* pipeline_tanh_pack8; | |||
| }; | |||
| } // namespace ncnn | |||