Browse Source

crop vulkan pack1to4 and pack4to1

tags/20190908
nihuini 6 years ago
parent
commit
6db731408c
4 changed files with 189 additions and 10 deletions
  1. +28
    -10
      src/layer/vulkan/crop_vulkan.cpp
  2. +2
    -0
      src/layer/vulkan/crop_vulkan.h
  3. +77
    -0
      src/layer/vulkan/shader/crop_pack1to4.comp
  4. +82
    -0
      src/layer/vulkan/shader/crop_pack4to1.comp

+ 28
- 10
src/layer/vulkan/crop_vulkan.cpp View File

@@ -25,6 +25,8 @@ Crop_vulkan::Crop_vulkan()

pipeline_crop = 0;
pipeline_crop_pack4 = 0;
pipeline_crop_pack1to4 = 0;
pipeline_crop_pack4to1 = 0;
}

int Crop_vulkan::create_pipeline(const Option& opt)
@@ -45,6 +47,20 @@ int Crop_vulkan::create_pipeline(const Option& opt)
pipeline_crop_pack4->create("crop_pack4", opt, specializations, 2, 13);
}

// pack1to4
{
pipeline_crop_pack1to4 = new Pipeline(vkdev);
pipeline_crop_pack1to4->set_optimal_local_size_xyz();
pipeline_crop_pack1to4->create("crop_pack1to4", opt, specializations, 2, 13);
}

// pack4to1
{
pipeline_crop_pack4to1 = new Pipeline(vkdev);
pipeline_crop_pack4to1->set_optimal_local_size_xyz();
pipeline_crop_pack4to1->create("crop_pack4to1", opt, specializations, 2, 13);
}

return 0;
}

@@ -56,6 +72,12 @@ int Crop_vulkan::destroy_pipeline(const Option& opt)
delete pipeline_crop_pack4;
pipeline_crop_pack4 = 0;

delete pipeline_crop_pack1to4;
pipeline_crop_pack1to4 = 0;

delete pipeline_crop_pack4to1;
pipeline_crop_pack4to1 = 0;

return 0;
}

@@ -167,19 +189,17 @@ int Crop_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& c
}
else if (elempack == 4 && out_elempack == 4)
{
constants[12].i = _coffset / 4;
constants[12].i = _coffset / 4;// TODO pack4to1to4

pipeline = pipeline_crop_pack4;
}
else if (elempack == 1 && out_elempack == 4)
{
// TODO
return -1;
pipeline = pipeline_crop_pack1to4;
}
else if (elempack == 4 && out_elempack == 1)
{
// TODO
return -1;
pipeline = pipeline_crop_pack4to1;
}

cmd.record_pipeline(pipeline, bindings, constants, top_blob);
@@ -266,19 +286,17 @@ int Crop_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkM
}
else if (elempack == 4 && out_elempack == 4)
{
constants[12].i = _coffset / 4;
constants[12].i = _coffset / 4;// TODO pack4to1to4

pipeline = pipeline_crop_pack4;
}
else if (elempack == 1 && out_elempack == 4)
{
// TODO
return -1;
pipeline = pipeline_crop_pack1to4;
}
else if (elempack == 4 && out_elempack == 1)
{
// TODO
return -1;
pipeline = pipeline_crop_pack4to1;
}

cmd.record_pipeline(pipeline, bindings, constants, top_blob);


+ 2
- 0
src/layer/vulkan/crop_vulkan.h View File

@@ -34,6 +34,8 @@ public:
public:
Pipeline* pipeline_crop;
Pipeline* pipeline_crop_pack4;
Pipeline* pipeline_crop_pack1to4;
Pipeline* pipeline_crop_pack4to1;
};

} // namespace ncnn


+ 77
- 0
src/layer/vulkan/shader/crop_pack1to4.comp View File

@@ -0,0 +1,77 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#version 450

#if NCNN_fp16_storage
#extension GL_EXT_shader_16bit_storage: require
#endif
#if NCNN_fp16_arithmetic
#extension GL_AMD_gpu_shader_half_float: require
#endif

layout (local_size_x_id = 233) in;
layout (local_size_y_id = 234) in;
layout (local_size_z_id = 235) in;

layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;

int outdims;
int outw;
int outh;
int outc;
int outcstep;

int woffset;
int hoffset;
int coffset;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= p.outw || gy >= p.outh || gz >= p.outc)
return;

int gi = gz * p.outcstep + gy * p.outw + gx;

int x = gx + p.woffset;
int y = gy + p.hoffset;
ivec4 z4 = gz * 4 + ivec4(0, 1, 2, 3) + p.coffset;
ivec4 v_offset = z4 * p.cstep + y * p.w + x;

#if NCNN_fp16_packed
vec2 v0 = vec2(bottom_blob_data[v_offset.r], bottom_blob_data[v_offset.g]);
vec2 v1 = vec2(bottom_blob_data[v_offset.b], bottom_blob_data[v_offset.a]);

top_blob_data[gi] = uvec2(packHalf2x16(v0), packHalf2x16(v1));
#else
top_blob_data[gi].r = bottom_blob_data[v_offset.r];
top_blob_data[gi].g = bottom_blob_data[v_offset.g];
top_blob_data[gi].b = bottom_blob_data[v_offset.b];
top_blob_data[gi].a = bottom_blob_data[v_offset.a];
#endif
}

+ 82
- 0
src/layer/vulkan/shader/crop_pack4to1.comp View File

@@ -0,0 +1,82 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#version 450

#if NCNN_fp16_storage
#extension GL_EXT_shader_16bit_storage: require
#endif
#if NCNN_fp16_arithmetic
#extension GL_AMD_gpu_shader_half_float: require
#endif

layout (local_size_x_id = 233) in;
layout (local_size_y_id = 234) in;
layout (local_size_z_id = 235) in;

layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;

int outdims;
int outw;
int outh;
int outc;
int outcstep;

int woffset;
int hoffset;
int coffset;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= p.outw || gy >= p.outh || gz >= p.outc)
return;

int gi = gz * p.outcstep + gy * p.outw + gx;

int x = gx + p.woffset;
int y = gy + p.hoffset;
int z = gz + p.coffset;

int sz = z / 4;
int lane = z % 4;
int v_offset = sz * p.cstep + y * p.w + x;

#if NCNN_fp16_packed
vec4 v = sfp2afpvec4(bottom_blob_data[v_offset]);

if (lane == 0) top_blob_data[gi] = v.r;
else if (lane == 1) top_blob_data[gi] = v.g;
else if (lane == 2) top_blob_data[gi] = v.b;
else /*(lane == 3)*/ top_blob_data[gi] = v.a;
#else
if (lane == 0) top_blob_data[gi] = bottom_blob_data[v_offset].r;
else if (lane == 1) top_blob_data[gi] = bottom_blob_data[v_offset].g;
else if (lane == 2) top_blob_data[gi] = bottom_blob_data[v_offset].b;
else /*(lane == 3)*/ top_blob_data[gi] = bottom_blob_data[v_offset].a;
#endif
}

Loading…
Cancel
Save