| @@ -24,6 +24,12 @@ PriorBox::PriorBox() | |||
| { | |||
| one_blob_only = false; | |||
| support_inplace = false; | |||
| support_vulkan = true; | |||
| #if NCNN_VULKAN | |||
| pipeline_priorbox = 0; | |||
| pipeline_priorbox_mxnet = 0; | |||
| #endif // NCNN_VULKAN | |||
| } | |||
| int PriorBox::load_param(const ParamDict& pd) | |||
| @@ -250,4 +256,186 @@ int PriorBox::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to | |||
| return 0; | |||
| } | |||
| #if NCNN_VULKAN | |||
| int PriorBox::upload_model(VkTransfer& cmd) | |||
| { | |||
| cmd.record_upload(min_sizes, min_sizes_gpu); | |||
| if (max_sizes.w > 0) | |||
| cmd.record_upload(max_sizes, max_sizes_gpu); | |||
| cmd.record_upload(aspect_ratios, aspect_ratios_gpu); | |||
| return 0; | |||
| } | |||
| int PriorBox::create_pipeline() | |||
| { | |||
| // caffe style | |||
| { | |||
| int num_min_size = min_sizes.w; | |||
| int num_max_size = max_sizes.w; | |||
| int num_aspect_ratio = aspect_ratios.w; | |||
| int num_prior = num_min_size * num_aspect_ratio + num_min_size + num_max_size; | |||
| if (flip) | |||
| num_prior += num_min_size * num_aspect_ratio; | |||
| std::vector<vk_specialization_type> specializations(11); | |||
| specializations[0].i = flip; | |||
| specializations[1].i = clip; | |||
| specializations[2].f = offset; | |||
| specializations[3].f = variances[0]; | |||
| specializations[4].f = variances[1]; | |||
| specializations[5].f = variances[2]; | |||
| specializations[6].f = variances[3]; | |||
| specializations[7].i = num_min_size; | |||
| specializations[8].i = num_max_size; | |||
| specializations[9].i = num_aspect_ratio; | |||
| specializations[10].i = num_prior; | |||
| pipeline_priorbox = new Pipeline(vkdev); | |||
| pipeline_priorbox->set_optimal_local_size_xyz(); | |||
| pipeline_priorbox->create("priorbox", specializations, 4, 6); | |||
| } | |||
| // mxnet style | |||
| { | |||
| int num_sizes = min_sizes.w; | |||
| int num_ratios = aspect_ratios.w; | |||
| int num_prior = num_sizes - 1 + num_ratios; | |||
| std::vector<vk_specialization_type> specializations(5); | |||
| specializations[0].i = clip; | |||
| specializations[1].f = offset; | |||
| specializations[2].i = num_sizes; | |||
| specializations[3].i = num_ratios; | |||
| specializations[4].i = num_prior; | |||
| pipeline_priorbox_mxnet = new Pipeline(vkdev); | |||
| pipeline_priorbox_mxnet->set_optimal_local_size_xyz(); | |||
| pipeline_priorbox_mxnet->create("priorbox_mxnet", specializations, 3, 4); | |||
| } | |||
| return 0; | |||
| } | |||
| int PriorBox::destroy_pipeline() | |||
| { | |||
| delete pipeline_priorbox; | |||
| pipeline_priorbox = 0; | |||
| delete pipeline_priorbox_mxnet; | |||
| pipeline_priorbox_mxnet = 0; | |||
| return 0; | |||
| } | |||
| int PriorBox::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt) const | |||
| { | |||
| int w = bottom_blobs[0].w; | |||
| int h = bottom_blobs[0].h; | |||
| if (bottom_blobs.size() == 1 && image_width == -233 && image_height == -233 && max_sizes.empty()) | |||
| { | |||
| // mxnet style _contrib_MultiBoxPrior | |||
| float step_w = step_width; | |||
| float step_h = step_height; | |||
| if (step_w == -233) | |||
| step_w = 1.f / (float)w; | |||
| if (step_h == -233) | |||
| step_h = 1.f / (float)h; | |||
| int num_sizes = min_sizes.w; | |||
| int num_ratios = aspect_ratios.w; | |||
| int num_prior = num_sizes - 1 + num_ratios; | |||
| VkMat& top_blob = top_blobs[0]; | |||
| top_blob.create(4 * w * h * num_prior, 4u, opt.blob_vkallocator, opt.staging_vkallocator); | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| std::vector<VkMat> bindings(3); | |||
| bindings[0] = top_blob; | |||
| bindings[1] = min_sizes_gpu; | |||
| bindings[2] = aspect_ratios_gpu; | |||
| std::vector<vk_constant_type> constants(4); | |||
| constants[0].i = w; | |||
| constants[1].i = h; | |||
| constants[2].f = step_w; | |||
| constants[3].f = step_h; | |||
| // record | |||
| cmd.record_prepare_compute_barrier(top_blob); | |||
| VkMat dispatcher; | |||
| dispatcher.w = num_sizes; | |||
| dispatcher.h = w; | |||
| dispatcher.c = h; | |||
| cmd.record_pipeline(pipeline_priorbox_mxnet, bindings, constants, dispatcher); | |||
| return 0; | |||
| } | |||
| int image_w = image_width; | |||
| int image_h = image_height; | |||
| if (image_w == -233) | |||
| image_w = bottom_blobs[1].w; | |||
| if (image_h == -233) | |||
| image_h = bottom_blobs[1].h; | |||
| float step_w = step_width; | |||
| float step_h = step_height; | |||
| if (step_w == -233) | |||
| step_w = (float)image_w / w; | |||
| if (step_h == -233) | |||
| step_h = (float)image_h / h; | |||
| int num_min_size = min_sizes.w; | |||
| int num_max_size = max_sizes.w; | |||
| int num_aspect_ratio = aspect_ratios.w; | |||
| int num_prior = num_min_size * num_aspect_ratio + num_min_size + num_max_size; | |||
| if (flip) | |||
| num_prior += num_min_size * num_aspect_ratio; | |||
| VkMat& top_blob = top_blobs[0]; | |||
| top_blob.create(4 * w * h * num_prior, 2, 4u, opt.blob_vkallocator, opt.staging_vkallocator); | |||
| if (top_blob.empty()) | |||
| return -100; | |||
| // fprintf(stderr, "PriorBox::forward %p\n", top_blob.buffer()); | |||
| std::vector<VkMat> bindings(4); | |||
| bindings[0] = top_blob; | |||
| bindings[1] = min_sizes_gpu; | |||
| bindings[2] = num_max_size > 0 ? max_sizes_gpu : min_sizes_gpu; | |||
| bindings[3] = aspect_ratios_gpu; | |||
| std::vector<vk_constant_type> constants(6); | |||
| constants[0].i = w; | |||
| constants[1].i = h; | |||
| constants[2].f = image_w; | |||
| constants[3].f = image_h; | |||
| constants[4].f = step_w; | |||
| constants[5].f = step_h; | |||
| // record | |||
| cmd.record_prepare_compute_barrier(top_blob); | |||
| VkMat dispatcher; | |||
| dispatcher.w = num_min_size; | |||
| dispatcher.h = w; | |||
| dispatcher.c = h; | |||
| cmd.record_pipeline(pipeline_priorbox, bindings, constants, dispatcher); | |||
| return 0; | |||
| } | |||
| #endif // NCNN_VULKAN | |||
| } // namespace ncnn | |||
| @@ -28,6 +28,15 @@ public: | |||
| virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const; | |||
| #if NCNN_VULKAN | |||
| virtual int upload_model(VkTransfer& cmd); | |||
| virtual int create_pipeline(); | |||
| virtual int destroy_pipeline(); | |||
| virtual int forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt) const; | |||
| #endif // NCNN_VULKAN | |||
| public: | |||
| Mat min_sizes; | |||
| Mat max_sizes; | |||
| @@ -40,6 +49,14 @@ public: | |||
| float step_width; | |||
| float step_height; | |||
| float offset; | |||
| #if NCNN_VULKAN | |||
| VkMat min_sizes_gpu; | |||
| VkMat max_sizes_gpu; | |||
| VkMat aspect_ratios_gpu; | |||
| Pipeline* pipeline_priorbox; | |||
| Pipeline* pipeline_priorbox_mxnet; | |||
| #endif // NCNN_VULKAN | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -61,7 +61,7 @@ void main() | |||
| } | |||
| if (order_type == 1) | |||
| { | |||
| v_offset = ivec4(gx * p.outw + gy) + ivec4(0, 1, 2, 3); | |||
| v_offset = ivec4(gx * p.outw + gy * 4) + ivec4(0, 1, 2, 3); | |||
| } | |||
| } | |||
| else if (p.dims == 3) | |||
| @@ -0,0 +1,131 @@ | |||
| // Tencent is pleased to support the open source community by making ncnn available. | |||
| // | |||
| // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. | |||
| // | |||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||
| // in compliance with the License. You may obtain a copy of the License at | |||
| // | |||
| // https://opensource.org/licenses/BSD-3-Clause | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software distributed | |||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
| // specific language governing permissions and limitations under the License. | |||
| #version 450 | |||
| layout (constant_id = 0) const int flip = 0; | |||
| layout (constant_id = 1) const int clip = 0; | |||
| layout (constant_id = 2) const float offset = 0; | |||
| layout (constant_id = 3) const float variances_0 = 0; | |||
| layout (constant_id = 4) const float variances_1 = 0; | |||
| layout (constant_id = 5) const float variances_2 = 0; | |||
| layout (constant_id = 6) const float variances_3 = 0; | |||
| layout (constant_id = 7) const int num_min_size = 0; | |||
| layout (constant_id = 8) const int num_max_size = 0; | |||
| layout (constant_id = 9) const int num_aspect_ratio = 0; | |||
| layout (constant_id = 10) const int num_prior = 0; | |||
| layout (local_size_x_id = 233) in; | |||
| layout (local_size_y_id = 234) in; | |||
| layout (local_size_z_id = 235) in; | |||
| layout (binding = 0) writeonly buffer top_blob { vec4 top_blob_data[]; }; | |||
| layout (binding = 1) readonly buffer min_sizes { float min_sizes_data[]; }; | |||
| layout (binding = 2) readonly buffer max_sizes { float max_sizes_data[]; }; | |||
| layout (binding = 3) readonly buffer aspect_ratios { float aspect_ratios_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int w; | |||
| int h; | |||
| float image_w; | |||
| float image_h; | |||
| float step_w; | |||
| float step_h; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= num_min_size || gy >= p.w || gz >= p.h) | |||
| return; | |||
| // anchor and variance | |||
| int v_offset = (gz * p.w + gy) * num_prior + gx; | |||
| int var_offset = p.w * p.h * num_prior + v_offset; | |||
| float center_x = (gy + offset) * p.step_w; | |||
| float center_y = (gz + offset) * p.step_h; | |||
| vec4 center = vec4(center_x, center_y, center_x, center_y); | |||
| vec4 image_norm = 1.f / vec4(p.image_w, p.image_h, p.image_w, p.image_h); | |||
| vec4 variance = vec4(variances_0, variances_1, variances_2, variances_3); | |||
| vec4 box; | |||
| float box_w; | |||
| float box_h; | |||
| float min_size = min_sizes_data[gx]; | |||
| // min size box | |||
| box_w = box_h = min_size; | |||
| box = (center + vec4(-box_w, -box_h, box_w, box_h) * 0.5f) * image_norm; | |||
| top_blob_data[v_offset] = clip == 1 ? clamp(box, 0.f, 1.f) : box; | |||
| top_blob_data[var_offset] = variance; | |||
| v_offset += 1; | |||
| var_offset += 1; | |||
| if (num_max_size > 0) | |||
| { | |||
| float max_size = max_sizes_data[gx]; | |||
| // max size box | |||
| box_w = box_h = sqrt(min_size * max_size); | |||
| box = (center + vec4(-box_w, -box_h, box_w, box_h) * 0.5f) * image_norm; | |||
| top_blob_data[v_offset] = clip == 1 ? clamp(box, 0.f, 1.f) : box; | |||
| top_blob_data[var_offset] = variance; | |||
| v_offset += 1; | |||
| var_offset += 1; | |||
| } | |||
| // all aspect_ratios | |||
| for (int pi = 0; pi < num_aspect_ratio; pi++) | |||
| { | |||
| float ar = aspect_ratios_data[pi]; | |||
| box_w = min_size * sqrt(ar); | |||
| box_h = min_size / sqrt(ar); | |||
| box = (center + vec4(-box_w, -box_h, box_w, box_h) * 0.5f) * image_norm; | |||
| top_blob_data[v_offset] = clip == 1 ? clamp(box, 0.f, 1.f) : box; | |||
| top_blob_data[var_offset] = variance; | |||
| v_offset += 1; | |||
| var_offset += 1; | |||
| if (flip == 1) | |||
| { | |||
| box = (center + vec4(-box_h, -box_w, box_h, box_w) * 0.5f) * image_norm; | |||
| top_blob_data[v_offset] = clip == 1 ? clamp(box, 0.f, 1.f) : box; | |||
| top_blob_data[var_offset] = variance; | |||
| v_offset += 1; | |||
| var_offset += 1; | |||
| } | |||
| } | |||
| } | |||
| @@ -0,0 +1,82 @@ | |||
| // Tencent is pleased to support the open source community by making ncnn available. | |||
| // | |||
| // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. | |||
| // | |||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||
| // in compliance with the License. You may obtain a copy of the License at | |||
| // | |||
| // https://opensource.org/licenses/BSD-3-Clause | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software distributed | |||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
| // specific language governing permissions and limitations under the License. | |||
| #version 450 | |||
| layout (constant_id = 0) const int clip = 0; | |||
| layout (constant_id = 1) const float offset = 0; | |||
| layout (constant_id = 2) const int num_sizes = 0; | |||
| layout (constant_id = 3) const int num_ratios = 0; | |||
| layout (constant_id = 4) const int num_prior = 0; | |||
| layout (local_size_x_id = 233) in; | |||
| layout (local_size_y_id = 234) in; | |||
| layout (local_size_z_id = 235) in; | |||
| layout (binding = 0) writeonly buffer top_blob { vec4 top_blob_data[]; }; | |||
| layout (binding = 1) readonly buffer min_sizes { float min_sizes_data[]; }; | |||
| layout (binding = 2) readonly buffer aspect_ratios { float aspect_ratios_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int w; | |||
| int h; | |||
| float step_w; | |||
| float step_h; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= num_sizes || gy >= p.w || gz >= p.h) | |||
| return; | |||
| // mxnet style _contrib_MultiBoxPrior | |||
| int v_offset = (gz * p.w + gy) * num_prior + gx; | |||
| float center_x = (gy + offset) * p.step_w; | |||
| float center_y = (gz + offset) * p.step_h; | |||
| vec4 center = vec4(center_x, center_y, center_x, center_y); | |||
| // ratio = 1, various sizes | |||
| float size = min_sizes_data[gx]; | |||
| float cw = size * p.h / p.w / 2; | |||
| float ch = size / 2; | |||
| vec4 box = center + vec4(-cw, -ch, cw, ch); | |||
| top_blob_data[v_offset] = clip == 1 ? clamp(box, 0.f, 1.f) : box; | |||
| if (gx == num_sizes - 1) | |||
| { | |||
| // various ratios, size = min_size = size[0] | |||
| float size = min_sizes_data[0]; | |||
| for (int pi = 1; pi < num_ratios; pi++) | |||
| { | |||
| float ratio = sqrt(aspect_ratios_data[pi]); | |||
| float cwr = size * p.h / p.w * ratio / 2; | |||
| float chr = size / ratio / 2; | |||
| // float cwr = cw * ratio; | |||
| // float chr = ch / ratio; | |||
| vec4 box = center + vec4(-cwr, -chr, cwr, chr); | |||
| top_blob_data[v_offset + pi] = clip == 1 ? clamp(box, 0.f, 1.f) : box; | |||
| } | |||
| } | |||
| } | |||
| @@ -1421,6 +1421,7 @@ inline void VkMat::discard_staging_buffer() | |||
| } | |||
| staging_data = 0; | |||
| staging_refcount = 0; | |||
| } | |||
| inline void VkMat::upload(const Mat& m) | |||