| @@ -22,6 +22,12 @@ PReLU::PReLU() | |||||
| { | { | ||||
| one_blob_only = true; | one_blob_only = true; | ||||
| support_inplace = true; | support_inplace = true; | ||||
| support_vulkan = true; | |||||
| #if NCNN_VULKAN | |||||
| pipeline_prelu = 0; | |||||
| pipeline_prelu_pack4 = 0; | |||||
| #endif // NCNN_VULKAN | |||||
| } | } | ||||
| int PReLU::load_param(const ParamDict& pd) | int PReLU::load_param(const ParamDict& pd) | ||||
| @@ -115,4 +121,84 @@ int PReLU::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| #if NCNN_VULKAN | |||||
| int PReLU::upload_model(VkTransfer& cmd) | |||||
| { | |||||
| if (num_slope == 1) | |||||
| { | |||||
| // dup4 for pack4 | |||||
| Mat slope_data4(4); | |||||
| slope_data4.fill(slope_data[0]); | |||||
| cmd.record_upload(slope_data4, slope_data_gpu); | |||||
| } | |||||
| else | |||||
| { | |||||
| cmd.record_upload(slope_data, slope_data_gpu); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| int PReLU::create_pipeline() | |||||
| { | |||||
| std::vector<vk_specialization_type> specializations(1); | |||||
| specializations[0].i = num_slope; | |||||
| // pack1 | |||||
| if (num_slope == 1 || num_slope % 4 != 0) | |||||
| { | |||||
| pipeline_prelu = new Pipeline(vkdev); | |||||
| pipeline_prelu->set_optimal_local_size_xyz(8, 8, num_slope); | |||||
| pipeline_prelu->create("prelu", specializations, 2, 5); | |||||
| } | |||||
| // pack4 | |||||
| if (num_slope == 1 || num_slope % 4 == 0) | |||||
| { | |||||
| pipeline_prelu_pack4 = new Pipeline(vkdev); | |||||
| pipeline_prelu_pack4->set_optimal_local_size_xyz(8, 8, num_slope / 4); | |||||
| pipeline_prelu_pack4->create("prelu_pack4", specializations, 2, 5); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| int PReLU::destroy_pipeline() | |||||
| { | |||||
| delete pipeline_prelu; | |||||
| pipeline_prelu = 0; | |||||
| delete pipeline_prelu_pack4; | |||||
| pipeline_prelu_pack4 = 0; | |||||
| return 0; | |||||
| } | |||||
| int PReLU::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const | |||||
| { | |||||
| int packing = bottom_top_blob.packing; | |||||
| // fprintf(stderr, "PReLU::forward_inplace %p\n", bottom_top_blob.buffer()); | |||||
| std::vector<VkMat> bindings(2); | |||||
| bindings[0] = bottom_top_blob; | |||||
| bindings[1] = slope_data_gpu; | |||||
| std::vector<vk_constant_type> constants(5); | |||||
| constants[0].i = bottom_top_blob.dims; | |||||
| constants[1].i = bottom_top_blob.w; | |||||
| constants[2].i = bottom_top_blob.h; | |||||
| constants[3].i = bottom_top_blob.c; | |||||
| constants[4].i = bottom_top_blob.cstep; | |||||
| const Pipeline* pipeline = packing == 4 ? pipeline_prelu_pack4 : pipeline_prelu; | |||||
| // record | |||||
| cmd.record_prepare_compute_barrier(bottom_top_blob); | |||||
| cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob); | |||||
| return 0; | |||||
| } | |||||
| #endif // NCNN_VULKAN | |||||
| } // namespace ncnn | } // namespace ncnn | ||||
| @@ -30,9 +30,24 @@ public: | |||||
| virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; | virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; | ||||
| #if NCNN_VULKAN | |||||
| virtual int upload_model(VkTransfer& cmd); | |||||
| virtual int create_pipeline(); | |||||
| virtual int destroy_pipeline(); | |||||
| virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const; | |||||
| #endif // NCNN_VULKAN | |||||
| public: | public: | ||||
| int num_slope; | int num_slope; | ||||
| Mat slope_data; | Mat slope_data; | ||||
| #if NCNN_VULKAN | |||||
| VkMat slope_data_gpu; | |||||
| Pipeline* pipeline_prelu; | |||||
| Pipeline* pipeline_prelu_pack4; | |||||
| #endif // NCNN_VULKAN | |||||
| }; | }; | ||||
| } // namespace ncnn | } // namespace ncnn | ||||
| @@ -0,0 +1,86 @@ | |||||
| // Tencent is pleased to support the open source community by making ncnn available. | |||||
| // | |||||
| // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. | |||||
| // | |||||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||||
| // in compliance with the License. You may obtain a copy of the License at | |||||
| // | |||||
| // https://opensource.org/licenses/BSD-3-Clause | |||||
| // | |||||
| // Unless required by applicable law or agreed to in writing, software distributed | |||||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||||
| // specific language governing permissions and limitations under the License. | |||||
| #version 450 | |||||
| layout (constant_id = 0) const int num_slope = 0; | |||||
| layout (local_size_x_id = 233) in; | |||||
| layout (local_size_y_id = 234) in; | |||||
| layout (local_size_z_id = 235) in; | |||||
| layout (binding = 0) buffer bottom_top_blob { float bottom_top_blob_data[]; }; | |||||
| layout (binding = 1) readonly buffer slope_blob { float slope_blob_data[]; }; | |||||
| layout (push_constant) uniform parameter | |||||
| { | |||||
| int dims; | |||||
| int w; | |||||
| int h; | |||||
| int c; | |||||
| int cstep; | |||||
| } p; | |||||
| void main() | |||||
| { | |||||
| int gx = int(gl_GlobalInvocationID.x); | |||||
| int gy = int(gl_GlobalInvocationID.y); | |||||
| int gz = int(gl_GlobalInvocationID.z); | |||||
| if (gx >= p.w || gy >= p.h || gz >= p.c) | |||||
| return; | |||||
| if (p.dims == 1) | |||||
| { | |||||
| float v = bottom_top_blob_data[gx]; | |||||
| float slope = num_slope > 1 ? slope_blob_data[gx] : slope_blob_data[0]; | |||||
| v = v < 0 ? v * slope : v; | |||||
| bottom_top_blob_data[gx] = v; | |||||
| return; | |||||
| } | |||||
| if (p.dims == 2) | |||||
| { | |||||
| const int gi = gy * p.w + gx; | |||||
| float v = bottom_top_blob_data[gi]; | |||||
| float slope = num_slope > 1 ? slope_blob_data[gy] : slope_blob_data[0]; | |||||
| v = v < 0 ? v * slope : v; | |||||
| bottom_top_blob_data[gi] = v; | |||||
| return; | |||||
| } | |||||
| if (p.dims == 3) | |||||
| { | |||||
| const int gi = gz * p.cstep + gy * p.w + gx; | |||||
| float v = bottom_top_blob_data[gi]; | |||||
| float slope = num_slope > 1 ? slope_blob_data[gz] : slope_blob_data[0]; | |||||
| v = v < 0 ? v * slope : v; | |||||
| bottom_top_blob_data[gi] = v; | |||||
| return; | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,86 @@ | |||||
| // Tencent is pleased to support the open source community by making ncnn available. | |||||
| // | |||||
| // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. | |||||
| // | |||||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||||
| // in compliance with the License. You may obtain a copy of the License at | |||||
| // | |||||
| // https://opensource.org/licenses/BSD-3-Clause | |||||
| // | |||||
| // Unless required by applicable law or agreed to in writing, software distributed | |||||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||||
| // specific language governing permissions and limitations under the License. | |||||
| #version 450 | |||||
| layout (constant_id = 0) const int num_slope = 0; | |||||
| layout (local_size_x_id = 233) in; | |||||
| layout (local_size_y_id = 234) in; | |||||
| layout (local_size_z_id = 235) in; | |||||
| layout (binding = 0) buffer bottom_top_blob { vec4 bottom_top_blob_data[]; }; | |||||
| layout (binding = 1) readonly buffer slope_blob { vec4 slope_blob_data[]; }; | |||||
| layout (push_constant) uniform parameter | |||||
| { | |||||
| int dims; | |||||
| int w; | |||||
| int h; | |||||
| int c; | |||||
| int cstep; | |||||
| } p; | |||||
| void main() | |||||
| { | |||||
| int gx = int(gl_GlobalInvocationID.x); | |||||
| int gy = int(gl_GlobalInvocationID.y); | |||||
| int gz = int(gl_GlobalInvocationID.z); | |||||
| if (gx >= p.w || gy >= p.h || gz >= p.c) | |||||
| return; | |||||
| if (p.dims == 1) | |||||
| { | |||||
| vec4 v = bottom_top_blob_data[gx]; | |||||
| vec4 slope = num_slope > 1 ? slope_blob_data[gx] : slope_blob_data[0]; | |||||
| v = mix(v, v * slope, lessThan(v, vec4(0.0))); | |||||
| bottom_top_blob_data[gx] = v; | |||||
| return; | |||||
| } | |||||
| if (p.dims == 2) | |||||
| { | |||||
| const int gi = gy * p.w + gx; | |||||
| vec4 v = bottom_top_blob_data[gi]; | |||||
| vec4 slope = num_slope > 1 ? slope_blob_data[gy] : slope_blob_data[0]; | |||||
| v = mix(v, v * slope, lessThan(v, vec4(0.0))); | |||||
| bottom_top_blob_data[gi] = v; | |||||
| return; | |||||
| } | |||||
| if (p.dims == 3) | |||||
| { | |||||
| const int gi = gz * p.cstep + gy * p.w + gx; | |||||
| vec4 v = bottom_top_blob_data[gi]; | |||||
| vec4 slope = num_slope > 1 ? slope_blob_data[gz] : slope_blob_data[0]; | |||||
| v = mix(v, v * slope, lessThan(v, vec4(0.0))); | |||||
| bottom_top_blob_data[gi] = v; | |||||
| return; | |||||
| } | |||||
| } | |||||