| @@ -22,6 +22,12 @@ PReLU::PReLU() | |||
| { | |||
| one_blob_only = true; | |||
| support_inplace = true; | |||
| support_vulkan = true; | |||
| #if NCNN_VULKAN | |||
| pipeline_prelu = 0; | |||
| pipeline_prelu_pack4 = 0; | |||
| #endif // NCNN_VULKAN | |||
| } | |||
| int PReLU::load_param(const ParamDict& pd) | |||
| @@ -115,4 +121,84 @@ int PReLU::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| return 0; | |||
| } | |||
| #if NCNN_VULKAN | |||
| int PReLU::upload_model(VkTransfer& cmd) | |||
| { | |||
| if (num_slope == 1) | |||
| { | |||
| // dup4 for pack4 | |||
| Mat slope_data4(4); | |||
| slope_data4.fill(slope_data[0]); | |||
| cmd.record_upload(slope_data4, slope_data_gpu); | |||
| } | |||
| else | |||
| { | |||
| cmd.record_upload(slope_data, slope_data_gpu); | |||
| } | |||
| return 0; | |||
| } | |||
| int PReLU::create_pipeline() | |||
| { | |||
| std::vector<vk_specialization_type> specializations(1); | |||
| specializations[0].i = num_slope; | |||
| // pack1 | |||
| if (num_slope == 1 || num_slope % 4 != 0) | |||
| { | |||
| pipeline_prelu = new Pipeline(vkdev); | |||
| pipeline_prelu->set_optimal_local_size_xyz(8, 8, num_slope); | |||
| pipeline_prelu->create("prelu", specializations, 2, 5); | |||
| } | |||
| // pack4 | |||
| if (num_slope == 1 || num_slope % 4 == 0) | |||
| { | |||
| pipeline_prelu_pack4 = new Pipeline(vkdev); | |||
| pipeline_prelu_pack4->set_optimal_local_size_xyz(8, 8, num_slope / 4); | |||
| pipeline_prelu_pack4->create("prelu_pack4", specializations, 2, 5); | |||
| } | |||
| return 0; | |||
| } | |||
| int PReLU::destroy_pipeline() | |||
| { | |||
| delete pipeline_prelu; | |||
| pipeline_prelu = 0; | |||
| delete pipeline_prelu_pack4; | |||
| pipeline_prelu_pack4 = 0; | |||
| return 0; | |||
| } | |||
| int PReLU::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const | |||
| { | |||
| int packing = bottom_top_blob.packing; | |||
| // fprintf(stderr, "PReLU::forward_inplace %p\n", bottom_top_blob.buffer()); | |||
| std::vector<VkMat> bindings(2); | |||
| bindings[0] = bottom_top_blob; | |||
| bindings[1] = slope_data_gpu; | |||
| std::vector<vk_constant_type> constants(5); | |||
| constants[0].i = bottom_top_blob.dims; | |||
| constants[1].i = bottom_top_blob.w; | |||
| constants[2].i = bottom_top_blob.h; | |||
| constants[3].i = bottom_top_blob.c; | |||
| constants[4].i = bottom_top_blob.cstep; | |||
| const Pipeline* pipeline = packing == 4 ? pipeline_prelu_pack4 : pipeline_prelu; | |||
| // record | |||
| cmd.record_prepare_compute_barrier(bottom_top_blob); | |||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||
| return 0; | |||
| } | |||
| #endif // NCNN_VULKAN | |||
| } // namespace ncnn | |||
| @@ -30,9 +30,24 @@ public: | |||
| virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; | |||
| #if NCNN_VULKAN | |||
| virtual int upload_model(VkTransfer& cmd); | |||
| virtual int create_pipeline(); | |||
| virtual int destroy_pipeline(); | |||
| virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const; | |||
| #endif // NCNN_VULKAN | |||
| public: | |||
| int num_slope; | |||
| Mat slope_data; | |||
| #if NCNN_VULKAN | |||
| VkMat slope_data_gpu; | |||
| Pipeline* pipeline_prelu; | |||
| Pipeline* pipeline_prelu_pack4; | |||
| #endif // NCNN_VULKAN | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -0,0 +1,86 @@ | |||
| // Tencent is pleased to support the open source community by making ncnn available. | |||
| // | |||
| // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. | |||
| // | |||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||
| // in compliance with the License. You may obtain a copy of the License at | |||
| // | |||
| // https://opensource.org/licenses/BSD-3-Clause | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software distributed | |||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
| // specific language governing permissions and limitations under the License. | |||
| #version 450 | |||
| layout (constant_id = 0) const int num_slope = 0; | |||
| layout (local_size_x_id = 233) in; | |||
| layout (local_size_y_id = 234) in; | |||
| layout (local_size_z_id = 235) in; | |||
| layout (binding = 0) buffer bottom_top_blob { float bottom_top_blob_data[]; }; | |||
| layout (binding = 1) readonly buffer slope_blob { float slope_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= p.w || gy >= p.h || gz >= p.c) | |||
| return; | |||
| if (p.dims == 1) | |||
| { | |||
| float v = bottom_top_blob_data[gx]; | |||
| float slope = num_slope > 1 ? slope_blob_data[gx] : slope_blob_data[0]; | |||
| v = v < 0 ? v * slope : v; | |||
| bottom_top_blob_data[gx] = v; | |||
| return; | |||
| } | |||
| if (p.dims == 2) | |||
| { | |||
| const int gi = gy * p.w + gx; | |||
| float v = bottom_top_blob_data[gi]; | |||
| float slope = num_slope > 1 ? slope_blob_data[gy] : slope_blob_data[0]; | |||
| v = v < 0 ? v * slope : v; | |||
| bottom_top_blob_data[gi] = v; | |||
| return; | |||
| } | |||
| if (p.dims == 3) | |||
| { | |||
| const int gi = gz * p.cstep + gy * p.w + gx; | |||
| float v = bottom_top_blob_data[gi]; | |||
| float slope = num_slope > 1 ? slope_blob_data[gz] : slope_blob_data[0]; | |||
| v = v < 0 ? v * slope : v; | |||
| bottom_top_blob_data[gi] = v; | |||
| return; | |||
| } | |||
| } | |||
| @@ -0,0 +1,86 @@ | |||
| // Tencent is pleased to support the open source community by making ncnn available. | |||
| // | |||
| // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. | |||
| // | |||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||
| // in compliance with the License. You may obtain a copy of the License at | |||
| // | |||
| // https://opensource.org/licenses/BSD-3-Clause | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software distributed | |||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
| // specific language governing permissions and limitations under the License. | |||
| #version 450 | |||
| layout (constant_id = 0) const int num_slope = 0; | |||
| layout (local_size_x_id = 233) in; | |||
| layout (local_size_y_id = 234) in; | |||
| layout (local_size_z_id = 235) in; | |||
| layout (binding = 0) buffer bottom_top_blob { vec4 bottom_top_blob_data[]; }; | |||
| layout (binding = 1) readonly buffer slope_blob { vec4 slope_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= p.w || gy >= p.h || gz >= p.c) | |||
| return; | |||
| if (p.dims == 1) | |||
| { | |||
| vec4 v = bottom_top_blob_data[gx]; | |||
| vec4 slope = num_slope > 1 ? slope_blob_data[gx] : slope_blob_data[0]; | |||
| v = mix(v, v * slope, lessThan(v, vec4(0.0))); | |||
| bottom_top_blob_data[gx] = v; | |||
| return; | |||
| } | |||
| if (p.dims == 2) | |||
| { | |||
| const int gi = gy * p.w + gx; | |||
| vec4 v = bottom_top_blob_data[gi]; | |||
| vec4 slope = num_slope > 1 ? slope_blob_data[gy] : slope_blob_data[0]; | |||
| v = mix(v, v * slope, lessThan(v, vec4(0.0))); | |||
| bottom_top_blob_data[gi] = v; | |||
| return; | |||
| } | |||
| if (p.dims == 3) | |||
| { | |||
| const int gi = gz * p.cstep + gy * p.w + gx; | |||
| vec4 v = bottom_top_blob_data[gi]; | |||
| vec4 slope = num_slope > 1 ? slope_blob_data[gz] : slope_blob_data[0]; | |||
| v = mix(v, v * slope, lessThan(v, vec4(0.0))); | |||
| bottom_top_blob_data[gi] = v; | |||
| return; | |||
| } | |||
| } | |||