| @@ -25,6 +25,8 @@ Crop_vulkan::Crop_vulkan() | |||
| pipeline_crop = 0; | |||
| pipeline_crop_pack4 = 0; | |||
| pipeline_crop_pack1to4 = 0; | |||
| pipeline_crop_pack4to1 = 0; | |||
| } | |||
| int Crop_vulkan::create_pipeline(const Option& opt) | |||
| @@ -45,6 +47,20 @@ int Crop_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_crop_pack4->create("crop_pack4", opt, specializations, 2, 13); | |||
| } | |||
| // pack1to4 | |||
| { | |||
| pipeline_crop_pack1to4 = new Pipeline(vkdev); | |||
| pipeline_crop_pack1to4->set_optimal_local_size_xyz(); | |||
| pipeline_crop_pack1to4->create("crop_pack1to4", opt, specializations, 2, 13); | |||
| } | |||
| // pack4to1 | |||
| { | |||
| pipeline_crop_pack4to1 = new Pipeline(vkdev); | |||
| pipeline_crop_pack4to1->set_optimal_local_size_xyz(); | |||
| pipeline_crop_pack4to1->create("crop_pack4to1", opt, specializations, 2, 13); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -56,6 +72,12 @@ int Crop_vulkan::destroy_pipeline(const Option& opt) | |||
| delete pipeline_crop_pack4; | |||
| pipeline_crop_pack4 = 0; | |||
| delete pipeline_crop_pack1to4; | |||
| pipeline_crop_pack1to4 = 0; | |||
| delete pipeline_crop_pack4to1; | |||
| pipeline_crop_pack4to1 = 0; | |||
| return 0; | |||
| } | |||
| @@ -167,19 +189,17 @@ int Crop_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& c | |||
| } | |||
| else if (elempack == 4 && out_elempack == 4) | |||
| { | |||
| constants[12].i = _coffset / 4; | |||
| constants[12].i = _coffset / 4;// TODO pack4to1to4 | |||
| pipeline = pipeline_crop_pack4; | |||
| } | |||
| else if (elempack == 1 && out_elempack == 4) | |||
| { | |||
| // TODO | |||
| return -1; | |||
| pipeline = pipeline_crop_pack1to4; | |||
| } | |||
| else if (elempack == 4 && out_elempack == 1) | |||
| { | |||
| // TODO | |||
| return -1; | |||
| pipeline = pipeline_crop_pack4to1; | |||
| } | |||
| cmd.record_pipeline(pipeline, bindings, constants, top_blob); | |||
| @@ -266,19 +286,17 @@ int Crop_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkM | |||
| } | |||
| else if (elempack == 4 && out_elempack == 4) | |||
| { | |||
| constants[12].i = _coffset / 4; | |||
| constants[12].i = _coffset / 4;// TODO pack4to1to4 | |||
| pipeline = pipeline_crop_pack4; | |||
| } | |||
| else if (elempack == 1 && out_elempack == 4) | |||
| { | |||
| // TODO | |||
| return -1; | |||
| pipeline = pipeline_crop_pack1to4; | |||
| } | |||
| else if (elempack == 4 && out_elempack == 1) | |||
| { | |||
| // TODO | |||
| return -1; | |||
| pipeline = pipeline_crop_pack4to1; | |||
| } | |||
| cmd.record_pipeline(pipeline, bindings, constants, top_blob); | |||
| @@ -34,6 +34,8 @@ public: | |||
| public: | |||
| Pipeline* pipeline_crop; | |||
| Pipeline* pipeline_crop_pack4; | |||
| Pipeline* pipeline_crop_pack1to4; | |||
| Pipeline* pipeline_crop_pack4to1; | |||
| }; | |||
| } // namespace ncnn | |||
| @@ -0,0 +1,77 @@ | |||
| // Tencent is pleased to support the open source community by making ncnn available. | |||
| // | |||
| // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. | |||
| // | |||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||
| // in compliance with the License. You may obtain a copy of the License at | |||
| // | |||
| // https://opensource.org/licenses/BSD-3-Clause | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software distributed | |||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
| // specific language governing permissions and limitations under the License. | |||
| #version 450 | |||
| #if NCNN_fp16_storage | |||
| #extension GL_EXT_shader_16bit_storage: require | |||
| #endif | |||
| #if NCNN_fp16_arithmetic | |||
| #extension GL_AMD_gpu_shader_half_float: require | |||
| #endif | |||
| layout (local_size_x_id = 233) in; | |||
| layout (local_size_y_id = 234) in; | |||
| layout (local_size_z_id = 235) in; | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| int outdims; | |||
| int outw; | |||
| int outh; | |||
| int outc; | |||
| int outcstep; | |||
| int woffset; | |||
| int hoffset; | |||
| int coffset; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= p.outw || gy >= p.outh || gz >= p.outc) | |||
| return; | |||
| int gi = gz * p.outcstep + gy * p.outw + gx; | |||
| int x = gx + p.woffset; | |||
| int y = gy + p.hoffset; | |||
| ivec4 z4 = gz * 4 + ivec4(0, 1, 2, 3) + p.coffset; | |||
| ivec4 v_offset = z4 * p.cstep + y * p.w + x; | |||
| #if NCNN_fp16_packed | |||
| vec2 v0 = vec2(bottom_blob_data[v_offset.r], bottom_blob_data[v_offset.g]); | |||
| vec2 v1 = vec2(bottom_blob_data[v_offset.b], bottom_blob_data[v_offset.a]); | |||
| top_blob_data[gi] = uvec2(packHalf2x16(v0), packHalf2x16(v1)); | |||
| #else | |||
| top_blob_data[gi].r = bottom_blob_data[v_offset.r]; | |||
| top_blob_data[gi].g = bottom_blob_data[v_offset.g]; | |||
| top_blob_data[gi].b = bottom_blob_data[v_offset.b]; | |||
| top_blob_data[gi].a = bottom_blob_data[v_offset.a]; | |||
| #endif | |||
| } | |||
| @@ -0,0 +1,82 @@ | |||
| // Tencent is pleased to support the open source community by making ncnn available. | |||
| // | |||
| // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. | |||
| // | |||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||
| // in compliance with the License. You may obtain a copy of the License at | |||
| // | |||
| // https://opensource.org/licenses/BSD-3-Clause | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software distributed | |||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
| // specific language governing permissions and limitations under the License. | |||
| #version 450 | |||
| #if NCNN_fp16_storage | |||
| #extension GL_EXT_shader_16bit_storage: require | |||
| #endif | |||
| #if NCNN_fp16_arithmetic | |||
| #extension GL_AMD_gpu_shader_half_float: require | |||
| #endif | |||
| layout (local_size_x_id = 233) in; | |||
| layout (local_size_y_id = 234) in; | |||
| layout (local_size_z_id = 235) in; | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; | |||
| layout (push_constant) uniform parameter | |||
| { | |||
| int dims; | |||
| int w; | |||
| int h; | |||
| int c; | |||
| int cstep; | |||
| int outdims; | |||
| int outw; | |||
| int outh; | |||
| int outc; | |||
| int outcstep; | |||
| int woffset; | |||
| int hoffset; | |||
| int coffset; | |||
| } p; | |||
| void main() | |||
| { | |||
| int gx = int(gl_GlobalInvocationID.x); | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= p.outw || gy >= p.outh || gz >= p.outc) | |||
| return; | |||
| int gi = gz * p.outcstep + gy * p.outw + gx; | |||
| int x = gx + p.woffset; | |||
| int y = gy + p.hoffset; | |||
| int z = gz + p.coffset; | |||
| int sz = z / 4; | |||
| int lane = z % 4; | |||
| int v_offset = sz * p.cstep + y * p.w + x; | |||
| #if NCNN_fp16_packed | |||
| vec4 v = sfp2afpvec4(bottom_blob_data[v_offset]); | |||
| if (lane == 0) top_blob_data[gi] = v.r; | |||
| else if (lane == 1) top_blob_data[gi] = v.g; | |||
| else if (lane == 2) top_blob_data[gi] = v.b; | |||
| else /*(lane == 3)*/ top_blob_data[gi] = v.a; | |||
| #else | |||
| if (lane == 0) top_blob_data[gi] = bottom_blob_data[v_offset].r; | |||
| else if (lane == 1) top_blob_data[gi] = bottom_blob_data[v_offset].g; | |||
| else if (lane == 2) top_blob_data[gi] = bottom_blob_data[v_offset].b; | |||
| else /*(lane == 3)*/ top_blob_data[gi] = bottom_blob_data[v_offset].a; | |||
| #endif | |||
| } | |||