Browse Source

priorbox shader, fix permute order 1 on image, fix potential staging memory leak

tags/20190320
nihui 7 years ago
parent
commit
c41bcd98a3
6 changed files with 420 additions and 1 deletions
  1. +188
    -0
      src/layer/priorbox.cpp
  2. +17
    -0
      src/layer/priorbox.h
  3. +1
    -1
      src/layer/shader/permute_pack4to1.comp
  4. +131
    -0
      src/layer/shader/priorbox.comp
  5. +82
    -0
      src/layer/shader/priorbox_mxnet.comp
  6. +1
    -0
      src/mat.h

+ 188
- 0
src/layer/priorbox.cpp View File

@@ -24,6 +24,12 @@ PriorBox::PriorBox()
{
one_blob_only = false;
support_inplace = false;
support_vulkan = true;

#if NCNN_VULKAN
pipeline_priorbox = 0;
pipeline_priorbox_mxnet = 0;
#endif // NCNN_VULKAN
}

int PriorBox::load_param(const ParamDict& pd)
@@ -250,4 +256,186 @@ int PriorBox::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to
return 0;
}

#if NCNN_VULKAN
int PriorBox::upload_model(VkTransfer& cmd)
{
cmd.record_upload(min_sizes, min_sizes_gpu);

if (max_sizes.w > 0)
cmd.record_upload(max_sizes, max_sizes_gpu);

cmd.record_upload(aspect_ratios, aspect_ratios_gpu);

return 0;
}

int PriorBox::create_pipeline()
{
// caffe style
{
int num_min_size = min_sizes.w;
int num_max_size = max_sizes.w;
int num_aspect_ratio = aspect_ratios.w;

int num_prior = num_min_size * num_aspect_ratio + num_min_size + num_max_size;
if (flip)
num_prior += num_min_size * num_aspect_ratio;

std::vector<vk_specialization_type> specializations(11);
specializations[0].i = flip;
specializations[1].i = clip;
specializations[2].f = offset;
specializations[3].f = variances[0];
specializations[4].f = variances[1];
specializations[5].f = variances[2];
specializations[6].f = variances[3];
specializations[7].i = num_min_size;
specializations[8].i = num_max_size;
specializations[9].i = num_aspect_ratio;
specializations[10].i = num_prior;

pipeline_priorbox = new Pipeline(vkdev);
pipeline_priorbox->set_optimal_local_size_xyz();
pipeline_priorbox->create("priorbox", specializations, 4, 6);
}

// mxnet style
{
int num_sizes = min_sizes.w;
int num_ratios = aspect_ratios.w;

int num_prior = num_sizes - 1 + num_ratios;

std::vector<vk_specialization_type> specializations(5);
specializations[0].i = clip;
specializations[1].f = offset;
specializations[2].i = num_sizes;
specializations[3].i = num_ratios;
specializations[4].i = num_prior;

pipeline_priorbox_mxnet = new Pipeline(vkdev);
pipeline_priorbox_mxnet->set_optimal_local_size_xyz();
pipeline_priorbox_mxnet->create("priorbox_mxnet", specializations, 3, 4);
}

return 0;
}

int PriorBox::destroy_pipeline()
{
delete pipeline_priorbox;
pipeline_priorbox = 0;

delete pipeline_priorbox_mxnet;
pipeline_priorbox_mxnet = 0;

return 0;
}

int PriorBox::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt) const
{
int w = bottom_blobs[0].w;
int h = bottom_blobs[0].h;

if (bottom_blobs.size() == 1 && image_width == -233 && image_height == -233 && max_sizes.empty())
{
// mxnet style _contrib_MultiBoxPrior
float step_w = step_width;
float step_h = step_height;
if (step_w == -233)
step_w = 1.f / (float)w;
if (step_h == -233)
step_h = 1.f / (float)h;

int num_sizes = min_sizes.w;
int num_ratios = aspect_ratios.w;

int num_prior = num_sizes - 1 + num_ratios;

VkMat& top_blob = top_blobs[0];
top_blob.create(4 * w * h * num_prior, 4u, opt.blob_vkallocator, opt.staging_vkallocator);
if (top_blob.empty())
return -100;

std::vector<VkMat> bindings(3);
bindings[0] = top_blob;
bindings[1] = min_sizes_gpu;
bindings[2] = aspect_ratios_gpu;

std::vector<vk_constant_type> constants(4);
constants[0].i = w;
constants[1].i = h;
constants[2].f = step_w;
constants[3].f = step_h;

// record
cmd.record_prepare_compute_barrier(top_blob);

VkMat dispatcher;
dispatcher.w = num_sizes;
dispatcher.h = w;
dispatcher.c = h;

cmd.record_pipeline(pipeline_priorbox_mxnet, bindings, constants, dispatcher);

return 0;
}

int image_w = image_width;
int image_h = image_height;
if (image_w == -233)
image_w = bottom_blobs[1].w;
if (image_h == -233)
image_h = bottom_blobs[1].h;

float step_w = step_width;
float step_h = step_height;
if (step_w == -233)
step_w = (float)image_w / w;
if (step_h == -233)
step_h = (float)image_h / h;

int num_min_size = min_sizes.w;
int num_max_size = max_sizes.w;
int num_aspect_ratio = aspect_ratios.w;

int num_prior = num_min_size * num_aspect_ratio + num_min_size + num_max_size;
if (flip)
num_prior += num_min_size * num_aspect_ratio;

VkMat& top_blob = top_blobs[0];
top_blob.create(4 * w * h * num_prior, 2, 4u, opt.blob_vkallocator, opt.staging_vkallocator);
if (top_blob.empty())
return -100;

// fprintf(stderr, "PriorBox::forward %p\n", top_blob.buffer());

std::vector<VkMat> bindings(4);
bindings[0] = top_blob;
bindings[1] = min_sizes_gpu;
bindings[2] = num_max_size > 0 ? max_sizes_gpu : min_sizes_gpu;
bindings[3] = aspect_ratios_gpu;

std::vector<vk_constant_type> constants(6);
constants[0].i = w;
constants[1].i = h;
constants[2].f = image_w;
constants[3].f = image_h;
constants[4].f = step_w;
constants[5].f = step_h;

// record
cmd.record_prepare_compute_barrier(top_blob);

VkMat dispatcher;
dispatcher.w = num_min_size;
dispatcher.h = w;
dispatcher.c = h;

cmd.record_pipeline(pipeline_priorbox, bindings, constants, dispatcher);

return 0;
}
#endif // NCNN_VULKAN

} // namespace ncnn

+ 17
- 0
src/layer/priorbox.h View File

@@ -28,6 +28,15 @@ public:

virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;

#if NCNN_VULKAN
virtual int upload_model(VkTransfer& cmd);

virtual int create_pipeline();
virtual int destroy_pipeline();

virtual int forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt) const;
#endif // NCNN_VULKAN

public:
Mat min_sizes;
Mat max_sizes;
@@ -40,6 +49,14 @@ public:
float step_width;
float step_height;
float offset;

#if NCNN_VULKAN
VkMat min_sizes_gpu;
VkMat max_sizes_gpu;
VkMat aspect_ratios_gpu;
Pipeline* pipeline_priorbox;
Pipeline* pipeline_priorbox_mxnet;
#endif // NCNN_VULKAN
};

} // namespace ncnn


+ 1
- 1
src/layer/shader/permute_pack4to1.comp View File

@@ -61,7 +61,7 @@ void main()
}
if (order_type == 1)
{
v_offset = ivec4(gx * p.outw + gy) + ivec4(0, 1, 2, 3);
v_offset = ivec4(gx * p.outw + gy * 4) + ivec4(0, 1, 2, 3);
}
}
else if (p.dims == 3)


+ 131
- 0
src/layer/shader/priorbox.comp View File

@@ -0,0 +1,131 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#version 450

layout (constant_id = 0) const int flip = 0;
layout (constant_id = 1) const int clip = 0;
layout (constant_id = 2) const float offset = 0;
layout (constant_id = 3) const float variances_0 = 0;
layout (constant_id = 4) const float variances_1 = 0;
layout (constant_id = 5) const float variances_2 = 0;
layout (constant_id = 6) const float variances_3 = 0;
layout (constant_id = 7) const int num_min_size = 0;
layout (constant_id = 8) const int num_max_size = 0;
layout (constant_id = 9) const int num_aspect_ratio = 0;
layout (constant_id = 10) const int num_prior = 0;

layout (local_size_x_id = 233) in;
layout (local_size_y_id = 234) in;
layout (local_size_z_id = 235) in;

layout (binding = 0) writeonly buffer top_blob { vec4 top_blob_data[]; };
layout (binding = 1) readonly buffer min_sizes { float min_sizes_data[]; };
layout (binding = 2) readonly buffer max_sizes { float max_sizes_data[]; };
layout (binding = 3) readonly buffer aspect_ratios { float aspect_ratios_data[]; };

layout (push_constant) uniform parameter
{
int w;
int h;

float image_w;
float image_h;
float step_w;
float step_h;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= num_min_size || gy >= p.w || gz >= p.h)
return;

// anchor and variance
int v_offset = (gz * p.w + gy) * num_prior + gx;
int var_offset = p.w * p.h * num_prior + v_offset;

float center_x = (gy + offset) * p.step_w;
float center_y = (gz + offset) * p.step_h;
vec4 center = vec4(center_x, center_y, center_x, center_y);

vec4 image_norm = 1.f / vec4(p.image_w, p.image_h, p.image_w, p.image_h);

vec4 variance = vec4(variances_0, variances_1, variances_2, variances_3);

vec4 box;

float box_w;
float box_h;

float min_size = min_sizes_data[gx];

// min size box
box_w = box_h = min_size;

box = (center + vec4(-box_w, -box_h, box_w, box_h) * 0.5f) * image_norm;

top_blob_data[v_offset] = clip == 1 ? clamp(box, 0.f, 1.f) : box;
top_blob_data[var_offset] = variance;

v_offset += 1;
var_offset += 1;

if (num_max_size > 0)
{
float max_size = max_sizes_data[gx];

// max size box
box_w = box_h = sqrt(min_size * max_size);

box = (center + vec4(-box_w, -box_h, box_w, box_h) * 0.5f) * image_norm;

top_blob_data[v_offset] = clip == 1 ? clamp(box, 0.f, 1.f) : box;
top_blob_data[var_offset] = variance;

v_offset += 1;
var_offset += 1;
}

// all aspect_ratios
for (int pi = 0; pi < num_aspect_ratio; pi++)
{
float ar = aspect_ratios_data[pi];

box_w = min_size * sqrt(ar);
box_h = min_size / sqrt(ar);

box = (center + vec4(-box_w, -box_h, box_w, box_h) * 0.5f) * image_norm;

top_blob_data[v_offset] = clip == 1 ? clamp(box, 0.f, 1.f) : box;
top_blob_data[var_offset] = variance;

v_offset += 1;
var_offset += 1;

if (flip == 1)
{
box = (center + vec4(-box_h, -box_w, box_h, box_w) * 0.5f) * image_norm;

top_blob_data[v_offset] = clip == 1 ? clamp(box, 0.f, 1.f) : box;
top_blob_data[var_offset] = variance;

v_offset += 1;
var_offset += 1;
}
}
}

+ 82
- 0
src/layer/shader/priorbox_mxnet.comp View File

@@ -0,0 +1,82 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#version 450

layout (constant_id = 0) const int clip = 0;
layout (constant_id = 1) const float offset = 0;
layout (constant_id = 2) const int num_sizes = 0;
layout (constant_id = 3) const int num_ratios = 0;
layout (constant_id = 4) const int num_prior = 0;

layout (local_size_x_id = 233) in;
layout (local_size_y_id = 234) in;
layout (local_size_z_id = 235) in;

layout (binding = 0) writeonly buffer top_blob { vec4 top_blob_data[]; };
layout (binding = 1) readonly buffer min_sizes { float min_sizes_data[]; };
layout (binding = 2) readonly buffer aspect_ratios { float aspect_ratios_data[]; };

layout (push_constant) uniform parameter
{
int w;
int h;

float step_w;
float step_h;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= num_sizes || gy >= p.w || gz >= p.h)
return;

// mxnet style _contrib_MultiBoxPrior
int v_offset = (gz * p.w + gy) * num_prior + gx;

float center_x = (gy + offset) * p.step_w;
float center_y = (gz + offset) * p.step_h;
vec4 center = vec4(center_x, center_y, center_x, center_y);

// ratio = 1, various sizes
float size = min_sizes_data[gx];
float cw = size * p.h / p.w / 2;
float ch = size / 2;

vec4 box = center + vec4(-cw, -ch, cw, ch);

top_blob_data[v_offset] = clip == 1 ? clamp(box, 0.f, 1.f) : box;

if (gx == num_sizes - 1)
{
// various ratios, size = min_size = size[0]
float size = min_sizes_data[0];
for (int pi = 1; pi < num_ratios; pi++)
{
float ratio = sqrt(aspect_ratios_data[pi]);
float cwr = size * p.h / p.w * ratio / 2;
float chr = size / ratio / 2;
// float cwr = cw * ratio;
// float chr = ch / ratio;

vec4 box = center + vec4(-cwr, -chr, cwr, chr);

top_blob_data[v_offset + pi] = clip == 1 ? clamp(box, 0.f, 1.f) : box;
}
}
}

+ 1
- 0
src/mat.h View File

@@ -1421,6 +1421,7 @@ inline void VkMat::discard_staging_buffer()
}

staging_data = 0;
staging_refcount = 0;
}

inline void VkMat::upload(const Mat& m)


Loading…
Cancel
Save