Browse Source

prelu shader

tags/20190320
nihui 7 years ago
parent
commit
dd83284cee
4 changed files with 273 additions and 0 deletions
  1. +86
    -0
      src/layer/prelu.cpp
  2. +15
    -0
      src/layer/prelu.h
  3. +86
    -0
      src/layer/shader/prelu.comp
  4. +86
    -0
      src/layer/shader/prelu_pack4.comp

+ 86
- 0
src/layer/prelu.cpp View File

@@ -22,6 +22,12 @@ PReLU::PReLU()
{
one_blob_only = true;
support_inplace = true;
support_vulkan = true;

#if NCNN_VULKAN
pipeline_prelu = 0;
pipeline_prelu_pack4 = 0;
#endif // NCNN_VULKAN
}

int PReLU::load_param(const ParamDict& pd)
@@ -115,4 +121,84 @@ int PReLU::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
return 0;
}

#if NCNN_VULKAN
int PReLU::upload_model(VkTransfer& cmd)
{
if (num_slope == 1)
{
// dup4 for pack4
Mat slope_data4(4);
slope_data4.fill(slope_data[0]);
cmd.record_upload(slope_data4, slope_data_gpu);
}
else
{
cmd.record_upload(slope_data, slope_data_gpu);
}

return 0;
}

int PReLU::create_pipeline()
{
std::vector<vk_specialization_type> specializations(1);
specializations[0].i = num_slope;

// pack1
if (num_slope == 1 || num_slope % 4 != 0)
{
pipeline_prelu = new Pipeline(vkdev);
pipeline_prelu->set_optimal_local_size_xyz(8, 8, num_slope);
pipeline_prelu->create("prelu", specializations, 2, 5);
}

// pack4
if (num_slope == 1 || num_slope % 4 == 0)
{
pipeline_prelu_pack4 = new Pipeline(vkdev);
pipeline_prelu_pack4->set_optimal_local_size_xyz(8, 8, num_slope / 4);
pipeline_prelu_pack4->create("prelu_pack4", specializations, 2, 5);
}

return 0;
}

int PReLU::destroy_pipeline()
{
delete pipeline_prelu;
pipeline_prelu = 0;

delete pipeline_prelu_pack4;
pipeline_prelu_pack4 = 0;

return 0;
}

int PReLU::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const
{
int packing = bottom_top_blob.packing;

// fprintf(stderr, "PReLU::forward_inplace %p\n", bottom_top_blob.buffer());

std::vector<VkMat> bindings(2);
bindings[0] = bottom_top_blob;
bindings[1] = slope_data_gpu;

std::vector<vk_constant_type> constants(5);
constants[0].i = bottom_top_blob.dims;
constants[1].i = bottom_top_blob.w;
constants[2].i = bottom_top_blob.h;
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = packing == 4 ? pipeline_prelu_pack4 : pipeline_prelu;

// record
cmd.record_prepare_compute_barrier(bottom_top_blob);
cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);

return 0;
}
#endif // NCNN_VULKAN

} // namespace ncnn

+ 15
- 0
src/layer/prelu.h View File

@@ -30,9 +30,24 @@ public:

virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;

#if NCNN_VULKAN
virtual int upload_model(VkTransfer& cmd);

virtual int create_pipeline();
virtual int destroy_pipeline();

virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt) const;
#endif // NCNN_VULKAN

public:
int num_slope;
Mat slope_data;

#if NCNN_VULKAN
VkMat slope_data_gpu;
Pipeline* pipeline_prelu;
Pipeline* pipeline_prelu_pack4;
#endif // NCNN_VULKAN
};

} // namespace ncnn


+ 86
- 0
src/layer/shader/prelu.comp View File

@@ -0,0 +1,86 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#version 450

layout (constant_id = 0) const int num_slope = 0;

layout (local_size_x_id = 233) in;
layout (local_size_y_id = 234) in;
layout (local_size_z_id = 235) in;

layout (binding = 0) buffer bottom_top_blob { float bottom_top_blob_data[]; };
layout (binding = 1) readonly buffer slope_blob { float slope_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= p.w || gy >= p.h || gz >= p.c)
return;

if (p.dims == 1)
{
float v = bottom_top_blob_data[gx];

float slope = num_slope > 1 ? slope_blob_data[gx] : slope_blob_data[0];

v = v < 0 ? v * slope : v;

bottom_top_blob_data[gx] = v;

return;
}

if (p.dims == 2)
{
const int gi = gy * p.w + gx;

float v = bottom_top_blob_data[gi];

float slope = num_slope > 1 ? slope_blob_data[gy] : slope_blob_data[0];

v = v < 0 ? v * slope : v;

bottom_top_blob_data[gi] = v;

return;
}

if (p.dims == 3)
{
const int gi = gz * p.cstep + gy * p.w + gx;

float v = bottom_top_blob_data[gi];

float slope = num_slope > 1 ? slope_blob_data[gz] : slope_blob_data[0];

v = v < 0 ? v * slope : v;

bottom_top_blob_data[gi] = v;

return;
}
}

+ 86
- 0
src/layer/shader/prelu_pack4.comp View File

@@ -0,0 +1,86 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#version 450

layout (constant_id = 0) const int num_slope = 0;

layout (local_size_x_id = 233) in;
layout (local_size_y_id = 234) in;
layout (local_size_z_id = 235) in;

layout (binding = 0) buffer bottom_top_blob { vec4 bottom_top_blob_data[]; };
layout (binding = 1) readonly buffer slope_blob { vec4 slope_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= p.w || gy >= p.h || gz >= p.c)
return;

if (p.dims == 1)
{
vec4 v = bottom_top_blob_data[gx];

vec4 slope = num_slope > 1 ? slope_blob_data[gx] : slope_blob_data[0];

v = mix(v, v * slope, lessThan(v, vec4(0.0)));

bottom_top_blob_data[gx] = v;

return;
}

if (p.dims == 2)
{
const int gi = gy * p.w + gx;

vec4 v = bottom_top_blob_data[gi];

vec4 slope = num_slope > 1 ? slope_blob_data[gy] : slope_blob_data[0];

v = mix(v, v * slope, lessThan(v, vec4(0.0)));

bottom_top_blob_data[gi] = v;

return;
}

if (p.dims == 3)
{
const int gi = gz * p.cstep + gy * p.w + gx;

vec4 v = bottom_top_blob_data[gi];

vec4 slope = num_slope > 1 ? slope_blob_data[gz] : slope_blob_data[0];

v = mix(v, v * slope, lessThan(v, vec4(0.0)));

bottom_top_blob_data[gi] = v;

return;
}
}

Loading…
Cancel
Save