Browse Source

reshape shader family

tags/20190320
nihui 7 years ago
parent
commit
69788b0467
7 changed files with 566 additions and 2 deletions
  1. +209
    -0
      src/layer/reshape.cpp
  2. +14
    -0
      src/layer/reshape.h
  3. +0
    -2
      src/layer/shader/priorbox_mxnet.comp
  4. +69
    -0
      src/layer/shader/reshape.comp
  5. +73
    -0
      src/layer/shader/reshape_pack1to4.comp
  6. +115
    -0
      src/layer/shader/reshape_pack4.comp
  7. +86
    -0
      src/layer/shader/reshape_pack4to1.comp

+ 209
- 0
src/layer/reshape.cpp View File

@@ -22,6 +22,14 @@ Reshape::Reshape()
{
one_blob_only = true;
support_inplace = false;
support_vulkan = true;

#if NCNN_VULKAN
pipeline_reshape = 0;
pipeline_reshape_pack4 = 0;
pipeline_reshape_pack1to4 = 0;
pipeline_reshape_pack4to1 = 0;
#endif // NCNN_VULKAN
}

int Reshape::load_param(const ParamDict& pd)
@@ -128,4 +136,205 @@ int Reshape::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) c
return 0;
}

#if NCNN_VULKAN
int Reshape::create_pipeline()
{
std::vector<vk_specialization_type> specializations(1);
specializations[0].i = ndim;

// pack1
{
pipeline_reshape = new Pipeline(vkdev);
pipeline_reshape->set_optimal_local_size_xyz();
pipeline_reshape->create("reshape", specializations, 2, 10);
}

// pack4
{
pipeline_reshape_pack4 = new Pipeline(vkdev);
pipeline_reshape_pack4->set_optimal_local_size_xyz();
pipeline_reshape_pack4->create("reshape_pack4", specializations, 2, 10);
}

// pack1to4
{
pipeline_reshape_pack1to4 = new Pipeline(vkdev);
pipeline_reshape_pack1to4->set_optimal_local_size_xyz();
pipeline_reshape_pack1to4->create("reshape_pack1to4", specializations, 2, 10);
}

// pack4to1
{
pipeline_reshape_pack4to1 = new Pipeline(vkdev);
pipeline_reshape_pack4to1->set_optimal_local_size_xyz();
pipeline_reshape_pack4to1->create("reshape_pack4to1", specializations, 2, 10);
}

return 0;
}

int Reshape::destroy_pipeline()
{
delete pipeline_reshape;
pipeline_reshape = 0;

delete pipeline_reshape_pack4;
pipeline_reshape_pack4 = 0;

delete pipeline_reshape_pack1to4;
pipeline_reshape_pack1to4 = 0;

delete pipeline_reshape_pack4to1;
pipeline_reshape_pack4to1 = 0;

return 0;
}

int Reshape::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const
{
int dims = bottom_blob.dims;
size_t elemsize = bottom_blob.elemsize;
int packing = bottom_blob.packing;
int out_packing;

int total = bottom_blob.w * bottom_blob.h * bottom_blob.c * packing;

if (ndim == 1)
{
int _w = w;

if (_w == 0)
_w = dims == 1 ? bottom_blob.w * packing : bottom_blob.w;

if (_w == -1)
_w = total;

// TODO permute support

out_packing = _w % 4 == 0 ? 4 : 1;
size_t out_elemsize = elemsize / packing * out_packing;

if (dims == 1 && bottom_blob.w == _w && packing == out_packing)
{
top_blob = bottom_blob;
return 0;
}

top_blob.create(_w / out_packing, out_elemsize, out_packing, opt.blob_vkallocator, opt.staging_vkallocator);
}
else if (ndim == 2)
{
int _w = w;
int _h = h;

if (_w == 0)
_w = dims == 1 ? bottom_blob.w * packing : bottom_blob.w;
if (_h == 0)
_h = dims == 2 ? bottom_blob.h * packing : bottom_blob.h;

if (_w == -1)
_w = total / _h;
if (_h == -1)
_h = total / _w;

out_packing = _h % 4 == 0 ? 4 : 1;
size_t out_elemsize = elemsize / packing * out_packing;

if (dims == 2 && bottom_blob.h == _h && packing == out_packing)
{
top_blob = bottom_blob;
return 0;
}

top_blob.create(_w, _h / out_packing, out_elemsize, out_packing, opt.blob_vkallocator, opt.staging_vkallocator);
}
else // if (ndim == 3)
{
int _w = w;
int _h = h;
int _c = c;

if (_w == 0)
_w = dims == 1 ? bottom_blob.w * packing : bottom_blob.w;
if (_h == 0)
_h = dims == 2 ? bottom_blob.h * packing : bottom_blob.h;
if (_c == 0)
_c = dims == 3 ? bottom_blob.c * packing : bottom_blob.c;

if (_w == -1)
_w = total / _c / _h;
if (_h == -1)
_h = total / _c / _w;
if (_c == -1)
_c = total / _h / _w;

out_packing = _c % 4 == 0 ? 4 : 1;
size_t out_elemsize = elemsize / packing * out_packing;

if (dims == 3 && bottom_blob.c == _c && packing == out_packing)
{
top_blob = bottom_blob;
top_blob.w = _w;
top_blob.h = _h;
return 0;
}

top_blob.create(_w, _h, _c / out_packing, out_elemsize, out_packing, opt.blob_vkallocator, opt.staging_vkallocator);
}

if (top_blob.empty())
return -100;

// fprintf(stderr, "Reshape::forward %p %p\n", bottom_blob.buffer(), top_blob.buffer());

std::vector<VkMat> bindings(2);
bindings[0] = bottom_blob;
bindings[1] = top_blob;

std::vector<vk_constant_type> constants(10);
constants[0].i = bottom_blob.dims;
constants[1].i = bottom_blob.w;
constants[2].i = bottom_blob.h;
constants[3].i = bottom_blob.c;
constants[4].i = bottom_blob.cstep;
constants[5].i = top_blob.dims;
constants[6].i = top_blob.w;
constants[7].i = top_blob.h;
constants[8].i = top_blob.c;
constants[9].i = top_blob.cstep;

const Pipeline* pipeline = 0;
if (packing == 1 && out_packing == 1)
{
pipeline = pipeline_reshape;
}
else if (packing == 4 && out_packing == 4)
{
pipeline = pipeline_reshape_pack4;
}
else if (packing == 1 && out_packing == 4)
{
pipeline = pipeline_reshape_pack1to4;
}
else if (packing == 4 && out_packing == 1)
{
pipeline = pipeline_reshape_pack4to1;
}

// record
cmd.record_prepare_compute_barrier(bottom_blob);
cmd.record_prepare_compute_barrier(top_blob);
if (packing == 4 && out_packing == 1)
{
cmd.record_pipeline(pipeline, bindings, constants, bottom_blob);
}
else
{
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
}

return 0;
}
#endif // NCNN_VULKAN

} // namespace ncnn

+ 14
- 0
src/layer/reshape.h View File

@@ -28,6 +28,13 @@ public:

virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;

#if NCNN_VULKAN
virtual int create_pipeline();
virtual int destroy_pipeline();

virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const;
#endif // NCNN_VULKAN

private:
// reshape flag
// 0 = copy from bottom
@@ -38,6 +45,13 @@ private:
int c;
int permute;
int ndim;

#if NCNN_VULKAN
Pipeline* pipeline_reshape;
Pipeline* pipeline_reshape_pack4;
Pipeline* pipeline_reshape_pack1to4;
Pipeline* pipeline_reshape_pack4to1;
#endif // NCNN_VULKAN
};

} // namespace ncnn


+ 0
- 2
src/layer/shader/priorbox_mxnet.comp View File

@@ -71,8 +71,6 @@ void main()
float ratio = sqrt(aspect_ratios_data[pi]);
float cwr = size * p.h / p.w * ratio / 2;
float chr = size / ratio / 2;
// float cwr = cw * ratio;
// float chr = ch / ratio;

vec4 box = center + vec4(-cwr, -chr, cwr, chr);



+ 69
- 0
src/layer/shader/reshape.comp View File

@@ -0,0 +1,69 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#version 450

layout (constant_id = 0) const int ndim = 0;

layout (local_size_x_id = 233) in;
layout (local_size_y_id = 234) in;
layout (local_size_z_id = 235) in;

layout (binding = 0) readonly buffer bottom_blob { float bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { float top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;

int outdims;
int outw;
int outh;
int outc;
int outcstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= p.outw || gy >= p.outh || gz >= p.outc)
return;

int i;

if (ndim == 1) i = gx;
if (ndim == 2) i = gy * p.outw + gx;
if (ndim == 3) i = gz * p.outh * p.outw + gy * p.outw + gx;

int size = p.w * p.h;

int z = i / size;
int y = i % size / p.w;
int x = i % size % p.w;

int v_offset = z * p.cstep + y * p.w + x;

float v = bottom_blob_data[v_offset];

if (ndim == 1) top_blob_data[gx] = v;
if (ndim == 2) top_blob_data[gy * p.outw + gx] = v;
if (ndim == 3) top_blob_data[gz * p.outcstep + gy * p.outw + gx] = v;
}

+ 73
- 0
src/layer/shader/reshape_pack1to4.comp View File

@@ -0,0 +1,73 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#version 450

layout (constant_id = 0) const int ndim = 0;

layout (local_size_x_id = 233) in;
layout (local_size_y_id = 234) in;
layout (local_size_z_id = 235) in;

layout (binding = 0) readonly buffer bottom_blob { float bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { vec4 top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;

int outdims;
int outw;
int outh;
int outc;
int outcstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= p.outw || gy >= p.outh || gz >= p.outc)
return;

ivec4 i4;

if (ndim == 1) i4 = gx * 4 + ivec4(0, 1, 2, 3);
if (ndim == 2) i4 = (gy * 4) * p.outw + gx + ivec4(0, 1, 2, 3) * p.outw;
if (ndim == 3) i4 = (gz * 4) * p.outh * p.outw + gy * p.outw + gx + ivec4(0, 1, 2, 3) * p.outh * p.outw;

int size = p.w * p.h;

ivec4 z4 = i4 / size;
ivec4 y4 = i4 % size / p.w;
ivec4 x4 = i4 % size % p.w;

ivec4 v_offset = z4 * p.cstep + y4 * p.w + x4;

vec4 v;
v.r = bottom_blob_data[v_offset.r];
v.g = bottom_blob_data[v_offset.g];
v.b = bottom_blob_data[v_offset.b];
v.a = bottom_blob_data[v_offset.a];

if (ndim == 1) top_blob_data[gx] = v;
if (ndim == 2) top_blob_data[gy * p.outw + gx] = v;
if (ndim == 3) top_blob_data[gz * p.outcstep + gy * p.outw + gx] = v;
}

+ 115
- 0
src/layer/shader/reshape_pack4.comp View File

@@ -0,0 +1,115 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#version 450

layout (constant_id = 0) const int ndim = 0;

layout (local_size_x_id = 233) in;
layout (local_size_y_id = 234) in;
layout (local_size_z_id = 235) in;

layout (binding = 0) readonly buffer bottom_blob { vec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { vec4 top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;

int outdims;
int outw;
int outh;
int outc;
int outcstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= p.outw || gy >= p.outh || gz >= p.outc)
return;

ivec4 i4;

if (ndim == 1) i4 = gx * 4 + ivec4(0, 1, 2, 3);
if (ndim == 2) i4 = (gy * 4) * p.outw + gx + ivec4(0, 1, 2, 3) * p.outw;
if (ndim == 3) i4 = (gz * 4) * p.outh * p.outw + gy * p.outw + gx + ivec4(0, 1, 2, 3) * p.outh * p.outw;

ivec4 v_offset;
ivec4 lane4;

if (p.dims == 1)
{
v_offset = i4 / 4;
lane4 = i4 % 4;
}
else if (p.dims == 2)
{
ivec4 y4 = i4 / p.w;
ivec4 x4 = i4 % p.w;

v_offset = (y4 / 4) * p.w + x4;
lane4 = y4 % 4;
}
else // if (p.dims == 3)
{
int size = p.w * p.h;

ivec4 z4 = i4 / size;
ivec4 y4 = i4 % size / p.w;
ivec4 x4 = i4 % size % p.w;

v_offset = (z4 / 4) * p.cstep + y4 * p.w + x4;
lane4 = z4 % 4;
}

vec4 v;

vec4 v4;

v4 = bottom_blob_data[v_offset.r];
if (lane4.r == 0) v.r = v4.r;
else if (lane4.r == 1) v.r = v4.g;
else if (lane4.r == 2) v.r = v4.b;
else /* if (lane4.r == 3) */ v.r = v4.a;

v4 = bottom_blob_data[v_offset.g];
if (lane4.g == 0) v.g = v4.r;
else if (lane4.g == 1) v.g = v4.g;
else if (lane4.g == 2) v.g = v4.b;
else /* if (lane4.g == 3) */ v.g = v4.a;

v4 = bottom_blob_data[v_offset.b];
if (lane4.b == 0) v.b = v4.r;
else if (lane4.b == 1) v.b = v4.g;
else if (lane4.b == 2) v.b = v4.b;
else /* if (lane4.b == 3) */ v.b = v4.a;

v4 = bottom_blob_data[v_offset.a];
if (lane4.a == 0) v.a = v4.r;
else if (lane4.a == 1) v.a = v4.g;
else if (lane4.a == 2) v.a = v4.b;
else /* if (lane4.a == 3) */ v.a = v4.a;

if (ndim == 1) top_blob_data[gx] = v;
if (ndim == 2) top_blob_data[gy * p.outw + gx] = v;
if (ndim == 3) top_blob_data[gz * p.outcstep + gy * p.outw + gx] = v;
}

+ 86
- 0
src/layer/shader/reshape_pack4to1.comp View File

@@ -0,0 +1,86 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#version 450

layout (constant_id = 0) const int ndim = 0;

layout (local_size_x_id = 233) in;
layout (local_size_y_id = 234) in;
layout (local_size_z_id = 235) in;

layout (binding = 0) readonly buffer bottom_blob { vec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { float top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;

int outdims;
int outw;
int outh;
int outc;
int outcstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= p.w || gy >= p.h || gz >= p.c)
return;

ivec4 i4;

if (p.dims == 1) i4 = gx * 4 + ivec4(0, 1, 2, 3);
if (p.dims == 2) i4 = (gy * 4) * p.w + gx + ivec4(0, 1, 2, 3) * p.w;
if (p.dims == 3) i4 = (gz * 4) * p.h * p.w + gy * p.w + gx + ivec4(0, 1, 2, 3) * p.h * p.w;

ivec4 v_offset;

if (ndim == 1)
{
v_offset = i4;
}
else if (ndim == 2)
{
ivec4 y4 = i4 / p.outw;
ivec4 x4 = i4 % p.outw;

v_offset = y4 * p.outw + x4;
}
else // if (ndim == 3)
{
int size = p.outw * p.outh;

ivec4 z4 = i4 / size;
ivec4 y4 = i4 % size / p.outw;
ivec4 x4 = i4 % size % p.outw;

v_offset = z4 * p.outcstep + y4 * p.outw + x4;
}

vec4 v = bottom_blob_data[gz * p.cstep + gy * p.w + gx];

top_blob_data[v_offset.r] = v.r;
top_blob_data[v_offset.g] = v.g;
top_blob_data[v_offset.b] = v.b;
top_blob_data[v_offset.a] = v.a;
}

Loading…
Cancel
Save