From cf3cf83cd349e4cbc0421b9ae75d35856938c33a Mon Sep 17 00:00:00 2001 From: nihui Date: Wed, 21 Oct 2020 17:16:21 +0800 Subject: [PATCH] unified image shader storage type (#2231) * drop bug_layout_binding_id_alias flag --- src/allocator.cpp | 128 +++------------- src/allocator.h | 18 +-- src/gpu.cpp | 53 +------ src/gpu.h | 1 - src/layer/vulkan/shader/absval.comp | 31 +--- src/layer/vulkan/shader/absval_pack4.comp | 31 +--- src/layer/vulkan/shader/absval_pack8.comp | 31 +--- src/layer/vulkan/shader/batchnorm.comp | 39 +---- src/layer/vulkan/shader/batchnorm_pack4.comp | 39 +---- src/layer/vulkan/shader/batchnorm_pack8.comp | 39 +---- src/layer/vulkan/shader/binaryop.comp | 62 +------- .../vulkan/shader/binaryop_broadcast.comp | 133 ++++++++--------- .../shader/binaryop_broadcast_a1_pack4.comp | 34 +---- .../shader/binaryop_broadcast_a1_pack8.comp | 34 +---- .../shader/binaryop_broadcast_b1_pack4.comp | 34 +---- .../shader/binaryop_broadcast_b1_pack8.comp | 34 +---- .../shader/binaryop_broadcast_pack4.comp | 133 ++++++++--------- .../shader/binaryop_broadcast_pack8.comp | 133 ++++++++--------- src/layer/vulkan/shader/binaryop_pack4.comp | 62 +------- src/layer/vulkan/shader/binaryop_pack8.comp | 62 +------- .../vulkan/shader/cast_fp16_to_fp32.comp | 17 +-- .../shader/cast_fp16_to_fp32_pack4.comp | 17 +-- .../shader/cast_fp16_to_fp32_pack8.comp | 17 +-- .../vulkan/shader/cast_fp32_to_fp16.comp | 17 +-- .../shader/cast_fp32_to_fp16_pack4.comp | 17 +-- .../shader/cast_fp32_to_fp16_pack8.comp | 17 +-- src/layer/vulkan/shader/clip.comp | 31 +--- src/layer/vulkan/shader/clip_pack4.comp | 31 +--- src/layer/vulkan/shader/clip_pack8.comp | 31 +--- src/layer/vulkan/shader/concat.comp | 28 +--- src/layer/vulkan/shader/concat_pack4.comp | 28 +--- src/layer/vulkan/shader/concat_pack4to1.comp | 82 ++++------- src/layer/vulkan/shader/concat_pack8.comp | 28 +--- src/layer/vulkan/shader/concat_pack8to1.comp | 130 +++++++---------- src/layer/vulkan/shader/concat_pack8to4.comp | 58 +++----- src/layer/vulkan/shader/convolution.comp | 4 +- .../vulkan/shader/convolution_1x1s1d1.comp | 4 +- .../vulkan/shader/convolution_pack1to4.comp | 4 +- .../vulkan/shader/convolution_pack1to8.comp | 4 +- .../vulkan/shader/convolution_pack4.comp | 4 +- .../shader/convolution_pack4_1x1s1d1.comp | 4 +- ...4_3x3s1d1_winograd23_transform_output.comp | 4 +- .../vulkan/shader/convolution_pack4to1.comp | 4 +- .../vulkan/shader/convolution_pack4to8.comp | 4 +- .../vulkan/shader/convolution_pack8.comp | 4 +- .../shader/convolution_pack8_1x1s1d1.comp | 4 +- ...8_3x3s1d1_winograd23_transform_output.comp | 4 +- .../vulkan/shader/convolution_pack8to1.comp | 4 +- .../vulkan/shader/convolution_pack8to4.comp | 4 +- .../vulkan/shader/convolutiondepthwise.comp | 8 +- .../shader/convolutiondepthwise_group.comp | 4 +- .../convolutiondepthwise_group_pack1to4.comp | 4 +- .../convolutiondepthwise_group_pack1to8.comp | 4 +- .../convolutiondepthwise_group_pack4.comp | 4 +- .../convolutiondepthwise_group_pack4to1.comp | 4 +- .../convolutiondepthwise_group_pack4to8.comp | 4 +- .../convolutiondepthwise_group_pack8.comp | 4 +- .../convolutiondepthwise_group_pack8to1.comp | 4 +- .../convolutiondepthwise_group_pack8to4.comp | 4 +- .../shader/convolutiondepthwise_pack4.comp | 8 +- .../shader/convolutiondepthwise_pack8.comp | 8 +- src/layer/vulkan/shader/deconvolution.comp | 4 +- .../vulkan/shader/deconvolution_pack1to4.comp | 4 +- .../vulkan/shader/deconvolution_pack1to8.comp | 4 +- .../vulkan/shader/deconvolution_pack4.comp | 4 +- .../vulkan/shader/deconvolution_pack4to1.comp | 4 +- .../vulkan/shader/deconvolution_pack4to8.comp | 4 +- .../vulkan/shader/deconvolution_pack8.comp | 4 +- .../vulkan/shader/deconvolution_pack8to1.comp | 4 +- .../vulkan/shader/deconvolution_pack8to4.comp | 4 +- .../vulkan/shader/deconvolutiondepthwise.comp | 8 +- .../shader/deconvolutiondepthwise_group.comp | 4 +- ...deconvolutiondepthwise_group_pack1to4.comp | 4 +- ...deconvolutiondepthwise_group_pack1to8.comp | 4 +- .../deconvolutiondepthwise_group_pack4.comp | 4 +- ...deconvolutiondepthwise_group_pack4to1.comp | 4 +- ...deconvolutiondepthwise_group_pack4to8.comp | 4 +- .../deconvolutiondepthwise_group_pack8.comp | 4 +- ...deconvolutiondepthwise_group_pack8to1.comp | 4 +- ...deconvolutiondepthwise_group_pack8to4.comp | 4 +- .../shader/deconvolutiondepthwise_pack4.comp | 8 +- .../shader/deconvolutiondepthwise_pack8.comp | 8 +- src/layer/vulkan/shader/deepcopy.comp | 17 +-- src/layer/vulkan/shader/deepcopy_pack4.comp | 17 +-- src/layer/vulkan/shader/deepcopy_pack8.comp | 17 +-- src/layer/vulkan/shader/dropout.comp | 31 +--- src/layer/vulkan/shader/dropout_pack4.comp | 31 +--- src/layer/vulkan/shader/dropout_pack8.comp | 31 +--- src/layer/vulkan/shader/eltwise.comp | 38 +---- src/layer/vulkan/shader/eltwise_pack4.comp | 38 +---- src/layer/vulkan/shader/eltwise_pack8.comp | 38 +---- src/layer/vulkan/shader/flatten.comp | 7 +- src/layer/vulkan/shader/flatten_pack1to4.comp | 13 +- src/layer/vulkan/shader/flatten_pack1to8.comp | 21 ++- src/layer/vulkan/shader/flatten_pack4.comp | 13 +- src/layer/vulkan/shader/flatten_pack4to8.comp | 21 ++- src/layer/vulkan/shader/flatten_pack8.comp | 21 ++- src/layer/vulkan/shader/hardsigmoid.comp | 27 +--- .../vulkan/shader/hardsigmoid_pack4.comp | 31 +--- .../vulkan/shader/hardsigmoid_pack8.comp | 31 +--- src/layer/vulkan/shader/hardswish.comp | 31 +--- src/layer/vulkan/shader/hardswish_pack4.comp | 31 +--- src/layer/vulkan/shader/hardswish_pack8.comp | 31 +--- src/layer/vulkan/shader/innerproduct.comp | 14 +- .../vulkan/shader/innerproduct_pack1to4.comp | 16 +- .../vulkan/shader/innerproduct_pack1to8.comp | 16 +- .../vulkan/shader/innerproduct_pack4.comp | 22 +-- .../vulkan/shader/innerproduct_pack4to1.comp | 16 +- .../vulkan/shader/innerproduct_pack4to8.comp | 30 ++-- .../vulkan/shader/innerproduct_pack8.comp | 30 ++-- .../vulkan/shader/innerproduct_pack8to1.comp | 16 +- .../vulkan/shader/innerproduct_pack8to4.comp | 22 +-- .../vulkan/shader/instancenorm_coeffs.comp | 22 +-- .../shader/instancenorm_coeffs_pack4.comp | 22 +-- .../shader/instancenorm_coeffs_pack8.comp | 26 ++-- .../vulkan/shader/instancenorm_norm.comp | 6 +- .../shader/instancenorm_norm_pack4.comp | 6 +- .../shader/instancenorm_norm_pack8.comp | 6 +- .../shader/instancenorm_reduce_mean.comp | 4 +- .../instancenorm_reduce_mean_pack4.comp | 4 +- .../instancenorm_reduce_mean_pack8.comp | 6 +- .../shader/instancenorm_sub_mean_square.comp | 4 +- .../instancenorm_sub_mean_square_pack4.comp | 4 +- .../instancenorm_sub_mean_square_pack8.comp | 4 +- src/layer/vulkan/shader/normalize_coeffs.comp | 4 +- .../vulkan/shader/normalize_coeffs_pack4.comp | 6 +- .../vulkan/shader/normalize_coeffs_pack8.comp | 6 +- src/layer/vulkan/shader/normalize_norm.comp | 12 +- .../vulkan/shader/normalize_norm_pack4.comp | 12 +- .../vulkan/shader/normalize_norm_pack8.comp | 12 +- src/layer/vulkan/shader/packing.comp | 12 +- .../vulkan/shader/packing_fp16_to_fp32.comp | 12 +- .../vulkan/shader/packing_fp32_to_fp16.comp | 12 +- src/layer/vulkan/shader/packing_pack1to4.comp | 24 ++- .../shader/packing_pack1to4_fp16_to_fp32.comp | 24 ++- .../shader/packing_pack1to4_fp32_to_fp16.comp | 24 ++- src/layer/vulkan/shader/packing_pack1to8.comp | 40 +++-- .../shader/packing_pack1to8_fp16_to_fp32.comp | 44 +++--- .../shader/packing_pack1to8_fp32_to_fp16.comp | 40 +++-- src/layer/vulkan/shader/packing_pack4.comp | 12 +- .../shader/packing_pack4_fp16_to_fp32.comp | 12 +- .../shader/packing_pack4_fp32_to_fp16.comp | 12 +- src/layer/vulkan/shader/packing_pack4to1.comp | 24 ++- .../shader/packing_pack4to1_fp16_to_fp32.comp | 24 ++- .../shader/packing_pack4to1_fp32_to_fp16.comp | 24 ++- src/layer/vulkan/shader/packing_pack4to8.comp | 16 +- .../shader/packing_pack4to8_fp16_to_fp32.comp | 16 +- .../shader/packing_pack4to8_fp32_to_fp16.comp | 16 +- src/layer/vulkan/shader/packing_pack8.comp | 12 +- .../shader/packing_pack8_fp16_to_fp32.comp | 26 ++-- .../shader/packing_pack8_fp32_to_fp16.comp | 12 +- src/layer/vulkan/shader/packing_pack8to1.comp | 40 +++-- .../shader/packing_pack8to1_fp16_to_fp32.comp | 40 +++-- .../shader/packing_pack8to1_fp32_to_fp16.comp | 40 +++-- src/layer/vulkan/shader/packing_pack8to4.comp | 16 +- .../shader/packing_pack8to4_fp16_to_fp32.comp | 16 +- .../shader/packing_pack8to4_fp32_to_fp16.comp | 16 +- src/layer/vulkan/shader/padding.comp | 4 +- src/layer/vulkan/shader/padding_pack4.comp | 4 +- src/layer/vulkan/shader/padding_pack8.comp | 4 +- src/layer/vulkan/shader/permute.comp | 6 +- src/layer/vulkan/shader/permute_pack1to4.comp | 20 ++- src/layer/vulkan/shader/permute_pack1to8.comp | 36 +++-- src/layer/vulkan/shader/permute_pack4.comp | 12 +- src/layer/vulkan/shader/permute_pack4to1.comp | 20 ++- src/layer/vulkan/shader/permute_pack4to8.comp | 20 ++- src/layer/vulkan/shader/permute_pack8.comp | 20 ++- src/layer/vulkan/shader/permute_pack8to1.comp | 36 +++-- src/layer/vulkan/shader/permute_pack8to4.comp | 12 +- src/layer/vulkan/shader/pooling_global.comp | 4 +- .../vulkan/shader/pooling_global_pack4.comp | 4 +- .../vulkan/shader/pooling_global_pack8.comp | 4 +- src/layer/vulkan/shader/prelu.comp | 35 +---- src/layer/vulkan/shader/prelu_pack4.comp | 35 +---- src/layer/vulkan/shader/prelu_pack8.comp | 35 +---- src/layer/vulkan/shader/relu.comp | 31 +--- src/layer/vulkan/shader/relu_pack4.comp | 31 +--- src/layer/vulkan/shader/relu_pack8.comp | 31 +--- src/layer/vulkan/shader/reshape.comp | 12 +- src/layer/vulkan/shader/reshape_pack1to4.comp | 24 ++- src/layer/vulkan/shader/reshape_pack1to8.comp | 40 +++-- src/layer/vulkan/shader/reshape_pack4.comp | 24 ++- src/layer/vulkan/shader/reshape_pack4to1.comp | 24 ++- src/layer/vulkan/shader/reshape_pack4to8.comp | 40 +++-- src/layer/vulkan/shader/reshape_pack8.comp | 40 +++-- src/layer/vulkan/shader/reshape_pack8to1.comp | 40 +++-- src/layer/vulkan/shader/reshape_pack8to4.comp | 24 ++- src/layer/vulkan/shader/scale.comp | 39 +---- src/layer/vulkan/shader/scale_pack4.comp | 39 +---- src/layer/vulkan/shader/scale_pack8.comp | 39 +---- src/layer/vulkan/shader/sigmoid.comp | 31 +--- src/layer/vulkan/shader/sigmoid_pack4.comp | 31 +--- src/layer/vulkan/shader/sigmoid_pack8.comp | 31 +--- src/layer/vulkan/shader/slice.comp | 28 +--- src/layer/vulkan/shader/slice_pack1to4.comp | 90 +++++------- src/layer/vulkan/shader/slice_pack1to8.comp | 138 ++++++++---------- src/layer/vulkan/shader/slice_pack4.comp | 28 +--- src/layer/vulkan/shader/slice_pack4to8.comp | 66 +++------ src/layer/vulkan/shader/slice_pack8.comp | 28 +--- src/layer/vulkan/shader/softmax_div_sum.comp | 38 ++--- .../vulkan/shader/softmax_div_sum_pack4.comp | 38 ++--- .../vulkan/shader/softmax_div_sum_pack8.comp | 38 ++--- .../vulkan/shader/softmax_exp_sub_max.comp | 38 ++--- .../shader/softmax_exp_sub_max_pack4.comp | 38 ++--- .../shader/softmax_exp_sub_max_pack8.comp | 38 ++--- .../vulkan/shader/softmax_reduce_max.comp | 23 ++- .../shader/softmax_reduce_max_pack4.comp | 23 ++- .../shader/softmax_reduce_max_pack8.comp | 23 ++- .../vulkan/shader/softmax_reduce_sum.comp | 23 ++- .../shader/softmax_reduce_sum_pack4.comp | 23 ++- .../shader/softmax_reduce_sum_pack8.comp | 23 ++- src/layer/vulkan/shader/tanh.comp | 31 +--- src/layer/vulkan/shader/tanh_pack4.comp | 31 +--- src/layer/vulkan/shader/tanh_pack8.comp | 31 +--- src/layer/vulkan/shader/unaryop.comp | 31 +--- src/layer/vulkan/shader/unaryop_pack4.comp | 31 +--- src/layer/vulkan/shader/unaryop_pack8.comp | 31 +--- src/mat.h | 12 +- src/net.cpp | 6 - tests/test_cast.cpp | 6 - tests/test_packing.cpp | 9 -- 221 files changed, 1515 insertions(+), 3712 deletions(-) diff --git a/src/allocator.cpp b/src/allocator.cpp index 0e68bc282..4b94923d7 100644 --- a/src/allocator.cpp +++ b/src/allocator.cpp @@ -375,13 +375,13 @@ VkDeviceMemory VkAllocator::allocate_dedicated_memory(size_t size, uint32_t memo return memory; } -VkImage VkAllocator::create_image(VkImageType type, int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage) +VkImage VkAllocator::create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage) { VkImageCreateInfo imageCreateInfo; imageCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, imageCreateInfo.pNext = 0; imageCreateInfo.flags = 0; - imageCreateInfo.imageType = type; + imageCreateInfo.imageType = VK_IMAGE_TYPE_3D; imageCreateInfo.format = format; imageCreateInfo.extent.width = width; imageCreateInfo.extent.height = height; @@ -400,21 +400,21 @@ VkImage VkAllocator::create_image(VkImageType type, int width, int height, int d VkResult ret = vkCreateImage(vkdev->vkdevice(), &imageCreateInfo, 0, &image); if (ret != VK_SUCCESS) { - NCNN_LOGE("vkCreateImage failed %d %d %d %d %d %d %d %d", ret, type, width, height, depth, format, tiling, usage); + NCNN_LOGE("vkCreateImage failed %d %d %d %d %d %d %d", ret, width, height, depth, format, tiling, usage); return 0; } return image; } -VkImageView VkAllocator::create_imageview(VkImageViewType type, VkImage image, VkFormat format) +VkImageView VkAllocator::create_imageview(VkImage image, VkFormat format) { VkImageViewCreateInfo imageViewCreateInfo; imageViewCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; imageViewCreateInfo.pNext = 0; imageViewCreateInfo.flags = 0; imageViewCreateInfo.image = image; - imageViewCreateInfo.viewType = type; + imageViewCreateInfo.viewType = VK_IMAGE_VIEW_TYPE_3D; imageViewCreateInfo.format = format; imageViewCreateInfo.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; imageViewCreateInfo.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; @@ -709,7 +709,7 @@ void VkBlobAllocator::fastFree(VkBufferMemory* ptr) delete ptr; } -VkImageMemory* VkBlobAllocator::fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack) +VkImageMemory* VkBlobAllocator::fastMalloc(int w, int h, int c, size_t elemsize, int elempack) { if (elempack != 1 && elempack != 4 && elempack != 8) { @@ -743,48 +743,16 @@ VkImageMemory* VkBlobAllocator::fastMalloc(int dims, int w, int h, int c, size_t // large elempack spills on image w if (elempack == 8) width *= 2; - VkImageType image_type; - VkImageViewType imageview_type; - if (dims == 1) + if (width > (int)vkdev->info.max_image_dimension_3d || height > (int)vkdev->info.max_image_dimension_3d || depth > (int)vkdev->info.max_image_dimension_3d) { - image_type = VK_IMAGE_TYPE_1D; - imageview_type = VK_IMAGE_VIEW_TYPE_1D; - - if (width > (int)vkdev->info.max_image_dimension_1d) - { - NCNN_LOGE("image dimension too large %d > %d", width, (int)vkdev->info.max_image_dimension_1d); - return 0; - } - } - else if (dims == 2) - { - image_type = VK_IMAGE_TYPE_2D; - imageview_type = VK_IMAGE_VIEW_TYPE_2D; - - if (width > (int)vkdev->info.max_image_dimension_2d || height > (int)vkdev->info.max_image_dimension_2d) - { - NCNN_LOGE("image dimension too large %d %d > %d", width, height, (int)vkdev->info.max_image_dimension_2d); - return 0; - } - } - else // if (dims == 3) - { - image_type = VK_IMAGE_TYPE_3D; - imageview_type = VK_IMAGE_VIEW_TYPE_3D; - - if (width > (int)vkdev->info.max_image_dimension_3d || height > (int)vkdev->info.max_image_dimension_3d || depth > (int)vkdev->info.max_image_dimension_3d) - { - NCNN_LOGE("image dimension too large %d %d %d > %d", width, height, depth, (int)vkdev->info.max_image_dimension_3d); - return 0; - } + NCNN_LOGE("image dimension too large %d %d %d > %d", width, height, depth, (int)vkdev->info.max_image_dimension_3d); + return 0; } VkImageMemory* ptr = new VkImageMemory; - ptr->image = create_image(image_type, width, height, depth, format, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); + ptr->image = create_image(width, height, depth, format, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); - ptr->image_type = image_type; - ptr->imageview_type = imageview_type; ptr->width = width; ptr->height = height; ptr->depth = depth; @@ -827,7 +795,7 @@ VkImageMemory* VkBlobAllocator::fastMalloc(int dims, int w, int h, int c, size_t // do not allow host access to optimal tiling image ptr->mapped_ptr = 0; - ptr->imageview = create_imageview(imageview_type, ptr->image, format); + ptr->imageview = create_imageview(ptr->image, format); ptr->access_flags = 0; ptr->image_layout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -894,7 +862,7 @@ VkImageMemory* VkBlobAllocator::fastMalloc(int dims, int w, int h, int c, size_t // do not allow host access to optimal tiling image ptr->mapped_ptr = 0; - ptr->imageview = create_imageview(imageview_type, ptr->image, format); + ptr->imageview = create_imageview(ptr->image, format); ptr->access_flags = 0; ptr->image_layout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -1244,7 +1212,7 @@ void VkWeightAllocator::fastFree(VkBufferMemory* ptr) delete ptr; } -VkImageMemory* VkWeightAllocator::fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack) +VkImageMemory* VkWeightAllocator::fastMalloc(int w, int h, int c, size_t elemsize, int elempack) { if (elempack != 1 && elempack != 4 && elempack != 8 && elempack != 16 && elempack != 32 && elempack != 64) { @@ -1287,48 +1255,16 @@ VkImageMemory* VkWeightAllocator::fastMalloc(int dims, int w, int h, int c, size if (elempack == 32) width *= 8; if (elempack == 64) width *= 16; - VkImageType image_type; - VkImageViewType imageview_type; - if (dims == 1) + if (width > (int)vkdev->info.max_image_dimension_3d || height > (int)vkdev->info.max_image_dimension_3d || depth > (int)vkdev->info.max_image_dimension_3d) { - image_type = VK_IMAGE_TYPE_1D; - imageview_type = VK_IMAGE_VIEW_TYPE_1D; - - if (width > (int)vkdev->info.max_image_dimension_1d) - { - NCNN_LOGE("image dimension too large %d > %d", width, (int)vkdev->info.max_image_dimension_1d); - return 0; - } - } - else if (dims == 2) - { - image_type = VK_IMAGE_TYPE_2D; - imageview_type = VK_IMAGE_VIEW_TYPE_2D; - - if (width > (int)vkdev->info.max_image_dimension_2d || height > (int)vkdev->info.max_image_dimension_2d) - { - NCNN_LOGE("image dimension too large %d %d > %d", width, height, (int)vkdev->info.max_image_dimension_2d); - return 0; - } - } - else // if (dims == 3) - { - image_type = VK_IMAGE_TYPE_3D; - imageview_type = VK_IMAGE_VIEW_TYPE_3D; - - if (width > (int)vkdev->info.max_image_dimension_3d || height > (int)vkdev->info.max_image_dimension_3d || depth > (int)vkdev->info.max_image_dimension_3d) - { - NCNN_LOGE("image dimension too large %d %d %d > %d", width, height, depth, (int)vkdev->info.max_image_dimension_3d); - return 0; - } + NCNN_LOGE("image dimension too large %d %d %d > %d", width, height, depth, (int)vkdev->info.max_image_dimension_3d); + return 0; } VkImageMemory* ptr = new VkImageMemory; - ptr->image = create_image(image_type, width, height, depth, format, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); + ptr->image = create_image(width, height, depth, format, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); - ptr->image_type = image_type; - ptr->imageview_type = imageview_type; ptr->width = width; ptr->height = height; ptr->depth = depth; @@ -1385,7 +1321,7 @@ VkImageMemory* VkWeightAllocator::fastMalloc(int dims, int w, int h, int c, size // do not allow host access to optimal tiling image ptr->mapped_ptr = 0; - ptr->imageview = create_imageview(imageview_type, ptr->image, format); + ptr->imageview = create_imageview(ptr->image, format); ptr->access_flags = 0; ptr->image_layout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -1426,7 +1362,7 @@ VkImageMemory* VkWeightAllocator::fastMalloc(int dims, int w, int h, int c, size // do not allow host access to optimal tiling image ptr->mapped_ptr = 0; - ptr->imageview = create_imageview(imageview_type, ptr->image, format); + ptr->imageview = create_imageview(ptr->image, format); ptr->access_flags = 0; ptr->image_layout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -1482,7 +1418,7 @@ VkImageMemory* VkWeightAllocator::fastMalloc(int dims, int w, int h, int c, size // do not allow host access to optimal tiling image ptr->mapped_ptr = 0; - ptr->imageview = create_imageview(imageview_type, ptr->image, format); + ptr->imageview = create_imageview(ptr->image, format); ptr->access_flags = 0; ptr->image_layout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -1612,36 +1548,16 @@ void VkStagingAllocator::fastFree(VkBufferMemory* ptr) buffer_budgets.push_back(ptr); } -VkImageMemory* VkStagingAllocator::fastMalloc(int dims, int w, int h, int c, size_t elemsize, int /* elempack */) +VkImageMemory* VkStagingAllocator::fastMalloc(int w, int h, int c, size_t elemsize, int /* elempack */) { // staging image is mainly used for storing small piece of dynamic parameters // we allocate host memory as a fake image, it's simple and good const size_t size = w * h * c * elemsize; - VkImageType image_type; - VkImageViewType imageview_type; - if (dims == 1) - { - image_type = VK_IMAGE_TYPE_1D; - imageview_type = VK_IMAGE_VIEW_TYPE_1D; - } - else if (dims == 2) - { - image_type = VK_IMAGE_TYPE_2D; - imageview_type = VK_IMAGE_VIEW_TYPE_2D; - } - else // if (dims == 3) - { - image_type = VK_IMAGE_TYPE_3D; - imageview_type = VK_IMAGE_VIEW_TYPE_3D; - } - VkImageMemory* ptr = new VkImageMemory; ptr->image = 0; - ptr->image_type = image_type; - ptr->imageview_type = imageview_type; ptr->width = w; ptr->height = h; ptr->depth = c; @@ -1746,7 +1662,7 @@ VkAndroidHardwareBufferImageAllocator::~VkAndroidHardwareBufferImageAllocator() } } -VkImageMemory* VkAndroidHardwareBufferImageAllocator::fastMalloc(int /*dims*/, int /*w*/, int /*h*/, int /*c*/, size_t /*elemsize*/, int /*elempack*/) +VkImageMemory* VkAndroidHardwareBufferImageAllocator::fastMalloc(int /*w*/, int /*h*/, int /*c*/, size_t /*elemsize*/, int /*elempack*/) { VkResult ret; diff --git a/src/allocator.h b/src/allocator.h index 377da449a..527c93196 100644 --- a/src/allocator.h +++ b/src/allocator.h @@ -224,8 +224,6 @@ public: VkImageView imageview; // underlying info assigned by allocator - VkImageType image_type; - VkImageViewType imageview_type; int width; int height; int depth; @@ -267,7 +265,7 @@ public: virtual int flush(VkBufferMemory* ptr); virtual int invalidate(VkBufferMemory* ptr); - virtual VkImageMemory* fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack) = 0; + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack) = 0; virtual void fastFree(VkImageMemory* ptr) = 0; public: @@ -282,8 +280,8 @@ protected: VkDeviceMemory allocate_memory(size_t size, uint32_t memory_type_index); VkDeviceMemory allocate_dedicated_memory(size_t size, uint32_t memory_type_index, VkImage image, VkBuffer buffer); - VkImage create_image(VkImageType type, int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage); - VkImageView create_imageview(VkImageViewType type, VkImage image, VkFormat format); + VkImage create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage); + VkImageView create_imageview(VkImage image, VkFormat format); }; class VkBlobAllocator : public VkAllocator @@ -298,7 +296,7 @@ public: virtual VkBufferMemory* fastMalloc(size_t size); virtual void fastFree(VkBufferMemory* ptr); - virtual VkImageMemory* fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); virtual void fastFree(VkImageMemory* ptr); protected: @@ -324,7 +322,7 @@ public: public: virtual VkBufferMemory* fastMalloc(size_t size); virtual void fastFree(VkBufferMemory* ptr); - virtual VkImageMemory* fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); virtual void fastFree(VkImageMemory* ptr); protected: @@ -355,7 +353,7 @@ public: virtual VkBufferMemory* fastMalloc(size_t size); virtual void fastFree(VkBufferMemory* ptr); - virtual VkImageMemory* fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); virtual void fastFree(VkImageMemory* ptr); protected: @@ -372,7 +370,7 @@ public: public: virtual VkBufferMemory* fastMalloc(size_t size); virtual void fastFree(VkBufferMemory* ptr); - virtual VkImageMemory* fastMalloc(int /*dims*/, int /*w*/, int /*h*/, int /*c*/, size_t /*elemsize*/, int /*elempack*/) + virtual VkImageMemory* fastMalloc(int /*w*/, int /*h*/, int /*c*/, size_t /*elemsize*/, int /*elempack*/) { return 0; } @@ -392,7 +390,7 @@ public: virtual ~VkAndroidHardwareBufferImageAllocator(); public: - virtual VkImageMemory* fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack); + virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); virtual void fastFree(VkImageMemory* ptr); virtual VkBufferMemory* fastMalloc(size_t /*size*/) { diff --git a/src/gpu.cpp b/src/gpu.cpp index aef580333..bcb82a234 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -641,16 +641,9 @@ int create_gpu_instance() // 650 = 0x5143 0x6050002 gpu_info.bug_storage_buffer_no_l1 = false; - gpu_info.bug_layout_binding_id_alias = false; gpu_info.bug_corrupted_online_pipeline_cache = false; gpu_info.bug_implicit_fp16_arithmetic = false; - if (physicalDeviceProperties.vendorID == 0x5143 && physicalDeviceProperties.apiVersion < VK_MAKE_VERSION(1, 0, 49)) - { - // qcom adreno with old buggy driver cannot handle binding id alias - gpu_info.bug_layout_binding_id_alias = true; - } - if (physicalDeviceProperties.vendorID == 0x5143 && physicalDeviceProperties.apiVersion < VK_MAKE_VERSION(1, 0, 66)) { // qcom adreno with old buggy driver cannot share created pipeline properly @@ -668,22 +661,6 @@ int create_gpu_instance() // gpu_info.bug_storage_buffer_no_l1 = true; } - if (physicalDeviceProperties.vendorID == 0x13b5 - && (physicalDeviceProperties.deviceID == 0x7500001 - || physicalDeviceProperties.deviceID == 0x8602000 - || physicalDeviceProperties.deviceID == 0x8800020)) - { - // these arm mali midgard era driver cannot handle binding id alias - gpu_info.bug_layout_binding_id_alias = true; - } - -#if __APPLE__ - { - // metal shader never accept binding id alias - gpu_info.bug_layout_binding_id_alias = true; - } -#endif - if (physicalDeviceProperties.vendorID == 0x13b5 && (physicalDeviceProperties.deviceID == 0x7500001 || physicalDeviceProperties.deviceID == 0x8602000 @@ -997,8 +974,8 @@ int create_gpu_instance() gpu_info.graphics_queue_family_index, gpu_info.graphics_queue_count, gpu_info.transfer_queue_family_index, gpu_info.transfer_queue_count); - NCNN_LOGE("[%u %s] bugsbn1=%d buglbia=%d bugcopc=%d bugihfa=%d", i, physicalDeviceProperties.deviceName, - gpu_info.bug_storage_buffer_no_l1, gpu_info.bug_layout_binding_id_alias, gpu_info.bug_corrupted_online_pipeline_cache, gpu_info.bug_implicit_fp16_arithmetic); + NCNN_LOGE("[%u %s] bugsbn1=%d bugcopc=%d bugihfa=%d", i, physicalDeviceProperties.deviceName, + gpu_info.bug_storage_buffer_no_l1, gpu_info.bug_corrupted_online_pipeline_cache, gpu_info.bug_implicit_fp16_arithmetic); NCNN_LOGE("[%u %s] fp16p=%d fp16s=%d fp16a=%d int8s=%d int8a=%d", i, physicalDeviceProperties.deviceName, gpu_info.support_fp16_packed, gpu_info.support_fp16_storage, gpu_info.support_fp16_arithmetic, @@ -2031,12 +2008,6 @@ void VulkanDevice::convert_packing(const VkMat& src, VkMat& dst, int dst_elempac void VulkanDevice::convert_packing(const VkImageMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const { - if (info.bug_layout_binding_id_alias) - { - NCNN_LOGE("cannot convert_packing i2i"); - return; - } - int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed ? 1 : 0; int packing_type_to_index = dst_elempack == 1 ? 0 : dst_elempack == 4 ? 1 : 2; @@ -2069,12 +2040,6 @@ void VulkanDevice::convert_packing(const VkImageMat& src, VkImageMat& dst, int d void VulkanDevice::convert_packing(const VkMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const { - if (info.bug_layout_binding_id_alias) - { - NCNN_LOGE("cannot convert_packing b2i"); - return; - } - int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed ? 1 : 0; int packing_type_to_index = dst_elempack == 1 ? 0 : dst_elempack == 4 ? 1 : 2; @@ -2107,12 +2072,6 @@ void VulkanDevice::convert_packing(const VkMat& src, VkImageMat& dst, int dst_el void VulkanDevice::convert_packing(const VkImageMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const { - if (info.bug_layout_binding_id_alias) - { - NCNN_LOGE("cannot convert_packing i2b"); - return; - } - int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed ? 1 : 0; int packing_type_to_index = dst_elempack == 1 ? 0 : dst_elempack == 4 ? 1 : 2; @@ -2359,12 +2318,6 @@ const ncnn::Packing_vulkan* VulkanDevice::get_utility_operator(int storage_type_ opt.use_fp16_packed = (cast_type_from_index == 1 || cast_type_to_index == 1); opt.use_fp16_storage = (cast_type_from_index == 2 || cast_type_to_index == 2); - if (info.bug_layout_binding_id_alias && opt.use_image_storage) - { - NCNN_LOGE("cannot create uop with use_image_storage if bug_layout_binding_id_alias"); - return 0; - } - if (!info.support_fp16_packed && opt.use_fp16_packed) { NCNN_LOGE("cannot create uop with use_fp16_packed if not support_fp16_packed"); @@ -2424,8 +2377,6 @@ void VulkanDevice::destroy_utility_operator() for (int i1 = 0; i1 < 2; i1++) { opt.use_image_storage = (i0 == 1 || i1 == 1); - if (info.bug_layout_binding_id_alias && opt.use_image_storage) - continue; // from fp32-b/i | fp16p-b/i | fp16s-b/i // to fp32-b/i | fp16p-b/i | fp16s-b/i diff --git a/src/gpu.h b/src/gpu.h index 07ba9d268..68ef3a466 100644 --- a/src/gpu.h +++ b/src/gpu.h @@ -129,7 +129,6 @@ public: // bug is not feature bool bug_storage_buffer_no_l1; - bool bug_layout_binding_id_alias; bool bug_corrupted_online_pipeline_cache; // but sometimes bug is a feature diff --git a/src/layer/vulkan/shader/absval.comp b/src/layer/vulkan/shader/absval.comp index 75f8f4768..0fe9a80a1 100644 --- a/src/layer/vulkan/shader/absval.comp +++ b/src/layer/vulkan/shader/absval.comp @@ -29,11 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; @@ -58,19 +54,7 @@ void main() return; #if NCNN_image_shader - afp v; - if (psc(dims) == 1) - { - v = image1d_ld1(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -80,18 +64,7 @@ void main() v = abs(v); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st1(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st1(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/absval_pack4.comp b/src/layer/vulkan/shader/absval_pack4.comp index d3f106b6b..15d9d4297 100644 --- a/src/layer/vulkan/shader/absval_pack4.comp +++ b/src/layer/vulkan/shader/absval_pack4.comp @@ -29,11 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; @@ -58,19 +54,7 @@ void main() return; #if NCNN_image_shader - afpvec4 v; - if (psc(dims) == 1) - { - v = image1d_ld4(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -80,18 +64,7 @@ void main() v = abs(v); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st4(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st4(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/absval_pack8.comp b/src/layer/vulkan/shader/absval_pack8.comp index 30a153276..25f5b4ce8 100644 --- a/src/layer/vulkan/shader/absval_pack8.comp +++ b/src/layer/vulkan/shader/absval_pack8.comp @@ -30,11 +30,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; @@ -59,19 +55,7 @@ void main() return; #if NCNN_image_shader - afpvec8 v; - if (psc(dims) == 1) - { - v = image1d_ld8(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -82,18 +66,7 @@ void main() v[1] = abs(v[1]); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st8(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st8(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/batchnorm.comp b/src/layer/vulkan/shader/batchnorm.comp index c3ff53a2a..3fd4fa8dc 100644 --- a/src/layer/vulkan/shader/batchnorm.comp +++ b/src/layer/vulkan/shader/batchnorm.comp @@ -29,14 +29,10 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; -layout (binding = 2) uniform unfp sampler1D a; -layout (binding = 3) uniform unfp sampler1D b; +layout (binding = 2) uniform unfp sampler3D a; +layout (binding = 3) uniform unfp sampler3D b; #else layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer a { sfp a_data[]; }; @@ -62,19 +58,7 @@ void main() return; #if NCNN_image_shader - afp v; - if (psc(dims) == 1) - { - v = image1d_ld1(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -85,8 +69,8 @@ void main() const int abi = gxyz[psc(dims) - 1]; #if NCNN_image_shader - afp b = image1d_ld1(b, abi); - afp a = image1d_ld1(a, abi); + afp b = image3d_ld1(b, ivec3(abi, 0, 0)); + afp a = image3d_ld1(a, ivec3(abi, 0, 0)); #else afp b = buffer_ld1(b_data, abi); afp a = buffer_ld1(a_data, abi); @@ -95,18 +79,7 @@ void main() v = b * v + a; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st1(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st1(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/batchnorm_pack4.comp b/src/layer/vulkan/shader/batchnorm_pack4.comp index 6924960e3..ff7717de4 100644 --- a/src/layer/vulkan/shader/batchnorm_pack4.comp +++ b/src/layer/vulkan/shader/batchnorm_pack4.comp @@ -29,14 +29,10 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; -layout (binding = 2) uniform unfp sampler1D a; -layout (binding = 3) uniform unfp sampler1D b; +layout (binding = 2) uniform unfp sampler3D a; +layout (binding = 3) uniform unfp sampler3D b; #else layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer a { sfpvec4 a_data[]; }; @@ -62,19 +58,7 @@ void main() return; #if NCNN_image_shader - afpvec4 v; - if (psc(dims) == 1) - { - v = image1d_ld4(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -85,8 +69,8 @@ void main() const int abi = gxyz[psc(dims) - 1]; #if NCNN_image_shader - afpvec4 b = image1d_ld4(b, abi); - afpvec4 a = image1d_ld4(a, abi); + afpvec4 b = image3d_ld4(b, ivec3(abi, 0, 0)); + afpvec4 a = image3d_ld4(a, ivec3(abi, 0, 0)); #else afpvec4 b = buffer_ld4(b_data, abi); afpvec4 a = buffer_ld4(a_data, abi); @@ -95,18 +79,7 @@ void main() v = b * v + a; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st4(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st4(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/batchnorm_pack8.comp b/src/layer/vulkan/shader/batchnorm_pack8.comp index 02e5eba6b..2b38a493e 100644 --- a/src/layer/vulkan/shader/batchnorm_pack8.comp +++ b/src/layer/vulkan/shader/batchnorm_pack8.comp @@ -30,14 +30,10 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; -layout (binding = 2) uniform unfp sampler1D a; -layout (binding = 3) uniform unfp sampler1D b; +layout (binding = 2) uniform unfp sampler3D a; +layout (binding = 3) uniform unfp sampler3D b; #else layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer a { sfpvec8 a_data[]; }; @@ -63,19 +59,7 @@ void main() return; #if NCNN_image_shader - afpvec8 v; - if (psc(dims) == 1) - { - v = image1d_ld8(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -86,8 +70,8 @@ void main() const int abi = gxyz[psc(dims) - 1]; #if NCNN_image_shader - afpvec8 b = image1d_ld8(b, abi); - afpvec8 a = image1d_ld8(a, abi); + afpvec8 b = image3d_ld8(b, ivec3(abi, 0, 0)); + afpvec8 a = image3d_ld8(a, ivec3(abi, 0, 0)); #else afpvec8 b = buffer_ld8(b_data, abi); afpvec8 a = buffer_ld8(a_data, abi); @@ -97,18 +81,7 @@ void main() v[1] = b[1] * v[1] + a[1]; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st8(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st8(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/binaryop.comp b/src/layer/vulkan/shader/binaryop.comp index 85f2d54fb..97189beaf 100644 --- a/src/layer/vulkan/shader/binaryop.comp +++ b/src/layer/vulkan/shader/binaryop.comp @@ -45,14 +45,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D a_blob_1d; -layout (binding = 0) uniform unfp sampler2D a_blob_2d; layout (binding = 0) uniform unfp sampler3D a_blob_3d; -layout (binding = 1) uniform unfp sampler1D b_blob_1d; -layout (binding = 1) uniform unfp sampler2D b_blob_2d; layout (binding = 1) uniform unfp sampler3D b_blob_3d; -layout (binding = 2, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 2, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 2, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer a_blob { sfp a_blob_data[]; }; @@ -91,20 +85,7 @@ void main() return; #if NCNN_image_shader - afp v1; - - if (psc(outdims) == 1) - { - v1 = image1d_ld1(a_blob_1d, gx); - } - else if (psc(outdims) == 2) - { - v1 = image2d_ld1(a_blob_2d, ivec2(gx, gy)); - } - else // if (psc(outdims) == 3) - { - v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz)); - } + afp v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; @@ -129,18 +110,7 @@ void main() if (op_type == 8) res = b / v1; #if NCNN_image_shader - if (psc(outdims) == 1) - { - image1d_st1(top_blob_1d, gx, res); - } - else if (psc(outdims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(outdims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st1(a_blob_data, gi, res); #endif @@ -149,20 +119,7 @@ void main() { // type 7 13 19 #if NCNN_image_shader - afp v2; - - if (psc(outdims) == 1) - { - v2 = image1d_ld1(b_blob_1d, gx); - } - else if (psc(outdims) == 2) - { - v2 = image2d_ld1(b_blob_2d, ivec2(gx, gy)); - } - else // if (psc(outdims) == 3) - { - v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz)); - } + afp v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz)); #else afp v2 = buffer_ld1(b_blob_data, gi); #endif @@ -178,18 +135,7 @@ void main() if (op_type == 8) res = v2 / v1; #if NCNN_image_shader - if (psc(outdims) == 1) - { - image1d_st1(top_blob_1d, gx, res); - } - else if (psc(outdims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(outdims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st1(top_blob_data, gi, res); #endif diff --git a/src/layer/vulkan/shader/binaryop_broadcast.comp b/src/layer/vulkan/shader/binaryop_broadcast.comp index e9a79be7d..f92f316bc 100644 --- a/src/layer/vulkan/shader/binaryop_broadcast.comp +++ b/src/layer/vulkan/shader/binaryop_broadcast.comp @@ -43,14 +43,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D a_blob_1d; -layout (binding = 0) uniform unfp sampler2D a_blob_2d; layout (binding = 0) uniform unfp sampler3D a_blob_3d; -layout (binding = 1) uniform unfp sampler1D b_blob_1d; -layout (binding = 1) uniform unfp sampler2D b_blob_2d; layout (binding = 1) uniform unfp sampler3D b_blob_3d; -layout (binding = 2, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 2, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 2, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer a_blob { sfp a_blob_data[]; }; @@ -89,8 +83,12 @@ void main() return; #if NCNN_image_shader - afp v1; - afp v2; + int ax = gx; + int ay = gy; + int az = gz; + int bx = gx; + int by = gy; + int bz = gz; if (psc(adims) == 3) { @@ -99,37 +97,36 @@ void main() if (psc(bw) == 1 && psc(bh) == 1) { // special type 1 - v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz)); - v2 = image3d_ld1(b_blob_3d, ivec3(0, 0, gz)); + bx = 0; + by = 0; } if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(bc) == 1) { // special type 2 - v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz)); - v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, 0)); + bz = 0; } if (psc(aw) == 1 && psc(ah) == 1) { // special type 3 - v1 = image3d_ld1(a_blob_3d, ivec3(0, 0, gz)); - v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz)); + ax = 0; + ay = 0; } if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1) { // special type 4 - v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, 0)); - v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz)); + az = 0; } } if (psc(bdims) == 2) { // type 18 - v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz)); - v2 = image2d_ld1(b_blob_2d, ivec2(gy, gz)); + bx = gy; + by = gz; + bz = 0; } if (psc(bdims) == 1) @@ -137,14 +134,16 @@ void main() if (psc(bw) == 1) { // type 16 - v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz)); - v2 = image1d_ld1(b_blob_1d, 0); + bx = 0; + by = 0; + bz = 0; } else { // type 17 - v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz)); - v2 = image1d_ld1(b_blob_1d, gz); + bx = gz; + by = 0; + bz = 0; } } } @@ -153,8 +152,9 @@ void main() if (psc(bdims) == 3) { // type 14 - v1 = image2d_ld1(a_blob_2d, ivec2(gy, gz)); - v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz)); + ax = gy; + ay = gz; + az = 0; } if (psc(bdims) == 1) @@ -162,67 +162,61 @@ void main() if (psc(bw) == 1) { // type 11 - v1 = image2d_ld1(a_blob_2d, ivec2(gx, gy)); - v2 = image1d_ld1(b_blob_1d, 0); + bx = 0; + by = 0; + bz = 0; } else { // type 12 - v1 = image2d_ld1(a_blob_2d, ivec2(gx, gy)); - v2 = image1d_ld1(b_blob_1d, gy); + bx = gy; + by = 0; + bz = 0; } } } else if (psc(adims) == 1) { - if (psc(bdims) == 3) + if (psc(aw) == 1) { - if (psc(aw) == 1) - { - // type 4 - v1 = image1d_ld1(a_blob_1d, 0); - v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz)); - } - else - { - // type 9 - v1 = image1d_ld1(a_blob_1d, gz); - v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz)); - } + // type 2 3 4 + ax = 0; + ay = 0; + az = 0; } - - if (psc(bdims) == 2) + else { - if (psc(aw) == 1) + if (psc(bdims) == 3) { - // type 3 - v1 = image1d_ld1(a_blob_1d, 0); - v2 = image2d_ld1(b_blob_2d, ivec2(gx, gy)); + // type 9 + ax = gz; + ay = 0; + az = 0; } - else + + if (psc(bdims) == 2) { // type 8 - v1 = image1d_ld1(a_blob_1d, gy); - v2 = image2d_ld1(b_blob_2d, ivec2(gx, gy)); + ax = gy; + ay = 0; + az = 0; } - } - if (psc(bdims) == 1) - { - if (psc(aw) == 1) - { - // type 2 - v1 = image1d_ld1(a_blob_1d, 0); - v2 = image1d_ld1(b_blob_1d, gx); - } - else // if (psc(bw) == 1) + if (psc(bdims) == 1) { - // type 6 - v1 = image1d_ld1(a_blob_1d, gx); - v2 = image1d_ld1(b_blob_1d, 0); + if (psc(bw) == 1) + { + // type 6 + bx = 0; + by = 0; + bz = 0; + } } } } + + afp v1 = image3d_ld1(a_blob_3d, ivec3(ax, ay, az)); + afp v2 = image3d_ld1(b_blob_3d, ivec3(bx, by, bz)); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; @@ -363,18 +357,7 @@ void main() if (op_type == 8) res = v2 / v1; #if NCNN_image_shader - if (psc(outdims) == 1) - { - image1d_st1(top_blob_1d, gx, res); - } - else if (psc(outdims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(outdims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st1(top_blob_data, gi, res); #endif diff --git a/src/layer/vulkan/shader/binaryop_broadcast_a1_pack4.comp b/src/layer/vulkan/shader/binaryop_broadcast_a1_pack4.comp index f74cab016..aabe12d93 100644 --- a/src/layer/vulkan/shader/binaryop_broadcast_a1_pack4.comp +++ b/src/layer/vulkan/shader/binaryop_broadcast_a1_pack4.comp @@ -43,13 +43,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D a_blob_1d; layout (binding = 0) uniform unfp sampler3D a_blob_3d; -layout (binding = 1) uniform unfp sampler1D b_blob_1d; -layout (binding = 1) uniform unfp sampler2D b_blob_2d; layout (binding = 1) uniform unfp sampler3D b_blob_3d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer a_blob { sfp a_blob_data[]; }; @@ -89,7 +84,6 @@ void main() #if NCNN_image_shader afpvec4 v1; - afpvec4 v2; if (psc(adims) == 3 && psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1) { @@ -98,21 +92,10 @@ void main() } else { - v1 = afpvec4(image1d_ld1(a_blob_1d, 0)); + v1 = afpvec4(image3d_ld1(a_blob_3d, ivec3(0, 0, 0))); } - if (psc(bdims) == 1) - { - v2 = image1d_ld4(b_blob_1d, gx); - } - else if (psc(bdims) == 2) - { - v2 = image2d_ld4(b_blob_2d, ivec2(gx, gy)); - } - else // if (psc(bdims) == 3) - { - v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); - } + afpvec4 v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; @@ -142,18 +125,7 @@ void main() if (op_type == 8) res = v2 / v1; #if NCNN_image_shader - if (psc(outdims) == 1) - { - image1d_st4(top_blob_1d, gx, res); - } - else if (psc(outdims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(outdims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st4(top_blob_data, gi, res); #endif diff --git a/src/layer/vulkan/shader/binaryop_broadcast_a1_pack8.comp b/src/layer/vulkan/shader/binaryop_broadcast_a1_pack8.comp index f08e03ff6..f44174803 100644 --- a/src/layer/vulkan/shader/binaryop_broadcast_a1_pack8.comp +++ b/src/layer/vulkan/shader/binaryop_broadcast_a1_pack8.comp @@ -44,13 +44,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D a_blob_1d; layout (binding = 0) uniform unfp sampler3D a_blob_3d; -layout (binding = 1) uniform unfp sampler1D b_blob_1d; -layout (binding = 1) uniform unfp sampler2D b_blob_2d; layout (binding = 1) uniform unfp sampler3D b_blob_3d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer a_blob { sfp a_blob_data[]; }; @@ -90,7 +85,6 @@ void main() #if NCNN_image_shader afpvec4 v1; - afpvec8 v2; if (psc(adims) == 3 && psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1) { @@ -99,21 +93,10 @@ void main() } else { - v1 = afpvec4(image1d_ld1(a_blob_1d, 0)); + v1 = afpvec4(image3d_ld1(a_blob_3d, ivec3(0, 0, 0))); } - if (psc(bdims) == 1) - { - v2 = image1d_ld8(b_blob_1d, gx); - } - else if (psc(bdims) == 2) - { - v2 = image2d_ld8(b_blob_2d, ivec2(gx, gy)); - } - else // if (psc(bdims) == 3) - { - v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); - } + afpvec8 v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; @@ -179,18 +162,7 @@ void main() } #if NCNN_image_shader - if (psc(outdims) == 1) - { - image1d_st8(top_blob_1d, gx, res); - } - else if (psc(outdims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(outdims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st8(top_blob_data, gi, res); #endif diff --git a/src/layer/vulkan/shader/binaryop_broadcast_b1_pack4.comp b/src/layer/vulkan/shader/binaryop_broadcast_b1_pack4.comp index e2464db72..9a1c31b76 100644 --- a/src/layer/vulkan/shader/binaryop_broadcast_b1_pack4.comp +++ b/src/layer/vulkan/shader/binaryop_broadcast_b1_pack4.comp @@ -43,13 +43,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D a_blob_1d; -layout (binding = 0) uniform unfp sampler2D a_blob_2d; layout (binding = 0) uniform unfp sampler3D a_blob_3d; -layout (binding = 1) uniform unfp sampler1D b_blob_1d; layout (binding = 1) uniform unfp sampler3D b_blob_3d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer a_blob { sfpvec4 a_blob_data[]; }; @@ -88,7 +83,6 @@ void main() return; #if NCNN_image_shader - afpvec4 v1; afpvec4 v2; if (psc(bdims) == 3 && psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(bc) == 1) @@ -98,21 +92,10 @@ void main() } else { - v2 = afpvec4(image1d_ld1(b_blob_1d, 0)); + v2 = afpvec4(image3d_ld1(b_blob_3d, ivec3(0, 0, 0))); } - if (psc(adims) == 1) - { - v1 = image1d_ld4(a_blob_1d, gx); - } - else if (psc(adims) == 2) - { - v1 = image2d_ld4(a_blob_2d, ivec2(gx, gy)); - } - else // if (psc(adims) == 3) - { - v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); - } + afpvec4 v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; @@ -142,18 +125,7 @@ void main() if (op_type == 8) res = v2 / v1; #if NCNN_image_shader - if (psc(outdims) == 1) - { - image1d_st4(top_blob_1d, gx, res); - } - else if (psc(outdims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(outdims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st4(top_blob_data, gi, res); #endif diff --git a/src/layer/vulkan/shader/binaryop_broadcast_b1_pack8.comp b/src/layer/vulkan/shader/binaryop_broadcast_b1_pack8.comp index 97034deee..e2c98395f 100644 --- a/src/layer/vulkan/shader/binaryop_broadcast_b1_pack8.comp +++ b/src/layer/vulkan/shader/binaryop_broadcast_b1_pack8.comp @@ -44,13 +44,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D a_blob_1d; -layout (binding = 0) uniform unfp sampler2D a_blob_2d; layout (binding = 0) uniform unfp sampler3D a_blob_3d; -layout (binding = 1) uniform unfp sampler1D b_blob_1d; layout (binding = 1) uniform unfp sampler3D b_blob_3d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer a_blob { sfpvec8 a_blob_data[]; }; @@ -89,7 +84,6 @@ void main() return; #if NCNN_image_shader - afpvec8 v1; afpvec4 v2; if (psc(bdims) == 3 && psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(bc) == 1) @@ -99,21 +93,10 @@ void main() } else { - v2 = afpvec4(image1d_ld1(b_blob_1d, 0)); + v2 = afpvec4(image3d_ld1(b_blob_3d, ivec3(0, 0, 0))); } - if (psc(adims) == 1) - { - v1 = image1d_ld8(a_blob_1d, gx); - } - else if (psc(adims) == 2) - { - v1 = image2d_ld8(a_blob_2d, ivec2(gx, gy)); - } - else // if (psc(adims) == 3) - { - v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); - } + afpvec8 v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; @@ -179,18 +162,7 @@ void main() } #if NCNN_image_shader - if (psc(outdims) == 1) - { - image1d_st8(top_blob_1d, gx, res); - } - else if (psc(outdims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(outdims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st8(top_blob_data, gi, res); #endif diff --git a/src/layer/vulkan/shader/binaryop_broadcast_pack4.comp b/src/layer/vulkan/shader/binaryop_broadcast_pack4.comp index 65e55486a..136e798db 100644 --- a/src/layer/vulkan/shader/binaryop_broadcast_pack4.comp +++ b/src/layer/vulkan/shader/binaryop_broadcast_pack4.comp @@ -43,14 +43,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D a_blob_1d; -layout (binding = 0) uniform unfp sampler2D a_blob_2d; layout (binding = 0) uniform unfp sampler3D a_blob_3d; -layout (binding = 1) uniform unfp sampler1D b_blob_1d; -layout (binding = 1) uniform unfp sampler2D b_blob_2d; layout (binding = 1) uniform unfp sampler3D b_blob_3d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer a_blob { sfpvec4 a_blob_data[]; }; @@ -89,8 +83,12 @@ void main() return; #if NCNN_image_shader - afpvec4 v1; - afpvec4 v2; + int ax = gx; + int ay = gy; + int az = gz; + int bx = gx; + int by = gy; + int bz = gz; if (psc(adims) == 3) { @@ -99,37 +97,36 @@ void main() if (psc(bw) == 1 && psc(bh) == 1) { // special type 1 - v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); - v2 = image3d_ld4(b_blob_3d, ivec3(0, 0, gz)); + bx = 0; + by = 0; } if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(bc) == 1) { // special type 2 - v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); - v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, 0)); + bz = 0; } if (psc(aw) == 1 && psc(ah) == 1) { // special type 3 - v1 = image3d_ld4(a_blob_3d, ivec3(0, 0, gz)); - v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); + ax = 0; + ay = 0; } if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1) { // special type 4 - v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, 0)); - v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); + az = 0; } } if (psc(bdims) == 2) { // type 18 - v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); - v2 = image2d_ld4(b_blob_2d, ivec2(gy, gz)); + bx = gy; + by = gz; + bz = 0; } if (psc(bdims) == 1) @@ -137,14 +134,16 @@ void main() if (psc(bw) == 1) { // type 16 - v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); - v2 = image1d_ld4(b_blob_1d, 0); + bx = 0; + by = 0; + bz = 0; } else { // type 17 - v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); - v2 = image1d_ld4(b_blob_1d, gz); + bx = gz; + by = 0; + bz = 0; } } } @@ -153,8 +152,9 @@ void main() if (psc(bdims) == 3) { // type 14 - v1 = image2d_ld4(a_blob_2d, ivec2(gy, gz)); - v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); + ax = gy; + ay = gz; + az = 0; } if (psc(bdims) == 1) @@ -162,67 +162,61 @@ void main() if (psc(bw) == 1) { // type 11 - v1 = image2d_ld4(a_blob_2d, ivec2(gx, gy)); - v2 = image1d_ld4(b_blob_1d, 0); + bx = 0; + by = 0; + bz = 0; } else { // type 12 - v1 = image2d_ld4(a_blob_2d, ivec2(gx, gy)); - v2 = image1d_ld4(b_blob_1d, gy); + bx = gy; + by = 0; + bz = 0; } } } else if (psc(adims) == 1) { - if (psc(bdims) == 3) + if (psc(aw) == 1) { - if (psc(aw) == 1) - { - // type 4 - v1 = image1d_ld4(a_blob_1d, 0); - v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); - } - else - { - // type 9 - v1 = image1d_ld4(a_blob_1d, gz); - v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); - } + // type 2 3 4 + ax = 0; + ay = 0; + az = 0; } - - if (psc(bdims) == 2) + else { - if (psc(aw) == 1) + if (psc(bdims) == 3) { - // type 3 - v1 = image1d_ld4(a_blob_1d, 0); - v2 = image2d_ld4(b_blob_2d, ivec2(gx, gy)); + // type 9 + ax = gz; + ay = 0; + az = 0; } - else + + if (psc(bdims) == 2) { // type 8 - v1 = image1d_ld4(a_blob_1d, gy); - v2 = image2d_ld4(b_blob_2d, ivec2(gx, gy)); + ax = gy; + ay = 0; + az = 0; } - } - if (psc(bdims) == 1) - { - if (psc(aw) == 1) + if (psc(bdims) == 1) { - // type 2 - v1 = image1d_ld4(a_blob_1d, 0); - v2 = image1d_ld4(b_blob_1d, gx); - } - else // if (psc(bw) == 1) - { - // type 6 - v1 = image1d_ld4(a_blob_1d, gx); - v2 = image1d_ld4(b_blob_1d, 0); + if (psc(bw) == 1) + { + // type 6 + bx = 0; + by = 0; + bz = 0; + } } } } + + afpvec4 v1 = image3d_ld4(a_blob_3d, ivec3(ax, ay, az)); + afpvec4 v2 = image3d_ld4(b_blob_3d, ivec3(bx, by, bz)); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; @@ -312,18 +306,7 @@ void main() if (op_type == 8) res = v2 / v1; #if NCNN_image_shader - if (psc(outdims) == 1) - { - image1d_st4(top_blob_1d, gx, res); - } - else if (psc(outdims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(outdims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st4(top_blob_data, gi, res); #endif diff --git a/src/layer/vulkan/shader/binaryop_broadcast_pack8.comp b/src/layer/vulkan/shader/binaryop_broadcast_pack8.comp index 3317113a6..3a7d1a574 100644 --- a/src/layer/vulkan/shader/binaryop_broadcast_pack8.comp +++ b/src/layer/vulkan/shader/binaryop_broadcast_pack8.comp @@ -44,14 +44,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D a_blob_1d; -layout (binding = 0) uniform unfp sampler2D a_blob_2d; layout (binding = 0) uniform unfp sampler3D a_blob_3d; -layout (binding = 1) uniform unfp sampler1D b_blob_1d; -layout (binding = 1) uniform unfp sampler2D b_blob_2d; layout (binding = 1) uniform unfp sampler3D b_blob_3d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer a_blob { sfpvec8 a_blob_data[]; }; @@ -90,8 +84,12 @@ void main() return; #if NCNN_image_shader - afpvec8 v1; - afpvec8 v2; + int ax = gx; + int ay = gy; + int az = gz; + int bx = gx; + int by = gy; + int bz = gz; if (psc(adims) == 3) { @@ -100,37 +98,36 @@ void main() if (psc(bw) == 1 && psc(bh) == 1) { // special type 1 - v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); - v2 = image3d_ld8(b_blob_3d, ivec3(0, 0, gz)); + bx = 0; + by = 0; } if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(bc) == 1) { // special type 2 - v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); - v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, 0)); + bz = 0; } if (psc(aw) == 1 && psc(ah) == 1) { // special type 3 - v1 = image3d_ld8(a_blob_3d, ivec3(0, 0, gz)); - v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); + ax = 0; + ay = 0; } if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1) { // special type 4 - v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, 0)); - v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); + az = 0; } } if (psc(bdims) == 2) { // type 18 - v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); - v2 = image2d_ld8(b_blob_2d, ivec2(gy, gz)); + bx = gy; + by = gz; + bz = 0; } if (psc(bdims) == 1) @@ -138,14 +135,16 @@ void main() if (psc(bw) == 1) { // type 16 - v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); - v2 = image1d_ld8(b_blob_1d, 0); + bx = 0; + by = 0; + bz = 0; } else { // type 17 - v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); - v2 = image1d_ld8(b_blob_1d, gz); + bx = gz; + by = 0; + bz = 0; } } } @@ -154,8 +153,9 @@ void main() if (psc(bdims) == 3) { // type 14 - v1 = image2d_ld8(a_blob_2d, ivec2(gy, gz)); - v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); + ax = gy; + ay = gz; + az = 0; } if (psc(bdims) == 1) @@ -163,67 +163,61 @@ void main() if (psc(bw) == 1) { // type 11 - v1 = image2d_ld8(a_blob_2d, ivec2(gx, gy)); - v2 = image1d_ld8(b_blob_1d, 0); + bx = 0; + by = 0; + bz = 0; } else { // type 12 - v1 = image2d_ld8(a_blob_2d, ivec2(gx, gy)); - v2 = image1d_ld8(b_blob_1d, gy); + bx = gy; + by = 0; + bz = 0; } } } else if (psc(adims) == 1) { - if (psc(bdims) == 3) + if (psc(aw) == 1) { - if (psc(aw) == 1) - { - // type 4 - v1 = image1d_ld8(a_blob_1d, 0); - v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); - } - else - { - // type 9 - v1 = image1d_ld8(a_blob_1d, gz); - v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); - } + // type 2 3 4 + ax = 0; + ay = 0; + az = 0; } - - if (psc(bdims) == 2) + else { - if (psc(aw) == 1) + if (psc(bdims) == 3) { - // type 3 - v1 = image1d_ld8(a_blob_1d, 0); - v2 = image2d_ld8(b_blob_2d, ivec2(gx, gy)); + // type 9 + ax = gz; + ay = 0; + az = 0; } - else + + if (psc(bdims) == 2) { // type 8 - v1 = image1d_ld8(a_blob_1d, gy); - v2 = image2d_ld8(b_blob_2d, ivec2(gx, gy)); + ax = gy; + ay = 0; + az = 0; } - } - if (psc(bdims) == 1) - { - if (psc(aw) == 1) + if (psc(bdims) == 1) { - // type 2 - v1 = image1d_ld8(a_blob_1d, 0); - v2 = image1d_ld8(b_blob_1d, gx); - } - else // if (psc(bw) == 1) - { - // type 6 - v1 = image1d_ld8(a_blob_1d, gx); - v2 = image1d_ld8(b_blob_1d, 0); + if (psc(bw) == 1) + { + // type 6 + bx = 0; + by = 0; + bz = 0; + } } } } + + afpvec8 v1 = image3d_ld8(a_blob_3d, ivec3(ax, ay, az)); + afpvec8 v2 = image3d_ld8(b_blob_3d, ivec3(bx, by, bz)); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; @@ -349,18 +343,7 @@ void main() } #if NCNN_image_shader - if (psc(outdims) == 1) - { - image1d_st8(top_blob_1d, gx, res); - } - else if (psc(outdims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(outdims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st8(top_blob_data, gi, res); #endif diff --git a/src/layer/vulkan/shader/binaryop_pack4.comp b/src/layer/vulkan/shader/binaryop_pack4.comp index 9b25c197c..b19505520 100644 --- a/src/layer/vulkan/shader/binaryop_pack4.comp +++ b/src/layer/vulkan/shader/binaryop_pack4.comp @@ -45,14 +45,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D a_blob_1d; -layout (binding = 0) uniform unfp sampler2D a_blob_2d; layout (binding = 0) uniform unfp sampler3D a_blob_3d; -layout (binding = 1) uniform unfp sampler1D b_blob_1d; -layout (binding = 1) uniform unfp sampler2D b_blob_2d; layout (binding = 1) uniform unfp sampler3D b_blob_3d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer a_blob { sfpvec4 a_blob_data[]; }; @@ -91,20 +85,7 @@ void main() return; #if NCNN_image_shader - afpvec4 v1; - - if (psc(outdims) == 1) - { - v1 = image1d_ld4(a_blob_1d, gx); - } - else if (psc(outdims) == 2) - { - v1 = image2d_ld4(a_blob_2d, ivec2(gx, gy)); - } - else // if (psc(outdims) == 3) - { - v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); - } + afpvec4 v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; @@ -129,18 +110,7 @@ void main() if (op_type == 8) res = b / v1; #if NCNN_image_shader - if (psc(outdims) == 1) - { - image1d_st4(top_blob_1d, gx, res); - } - else if (psc(outdims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(outdims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st4(a_blob_data, gi, res); #endif @@ -149,20 +119,7 @@ void main() { // type 7 13 19 #if NCNN_image_shader - afpvec4 v2; - - if (psc(outdims) == 1) - { - v2 = image1d_ld4(b_blob_1d, gx); - } - else if (psc(outdims) == 2) - { - v2 = image2d_ld4(b_blob_2d, ivec2(gx, gy)); - } - else // if (psc(outdims) == 3) - { - v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); - } + afpvec4 v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); #else afpvec4 v2 = buffer_ld4(b_blob_data, gi); #endif @@ -178,18 +135,7 @@ void main() if (op_type == 8) res = v2 / v1; #if NCNN_image_shader - if (psc(outdims) == 1) - { - image1d_st4(top_blob_1d, gx, res); - } - else if (psc(outdims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(outdims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st4(top_blob_data, gi, res); #endif diff --git a/src/layer/vulkan/shader/binaryop_pack8.comp b/src/layer/vulkan/shader/binaryop_pack8.comp index 453cc17bc..010187918 100644 --- a/src/layer/vulkan/shader/binaryop_pack8.comp +++ b/src/layer/vulkan/shader/binaryop_pack8.comp @@ -46,14 +46,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D a_blob_1d; -layout (binding = 0) uniform unfp sampler2D a_blob_2d; layout (binding = 0) uniform unfp sampler3D a_blob_3d; -layout (binding = 1) uniform unfp sampler1D b_blob_1d; -layout (binding = 1) uniform unfp sampler2D b_blob_2d; layout (binding = 1) uniform unfp sampler3D b_blob_3d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer a_blob { sfpvec8 a_blob_data[]; }; @@ -92,20 +86,7 @@ void main() return; #if NCNN_image_shader - afpvec8 v1; - - if (psc(outdims) == 1) - { - v1 = image1d_ld8(a_blob_1d, gx); - } - else if (psc(outdims) == 2) - { - v1 = image2d_ld8(a_blob_2d, ivec2(gx, gy)); - } - else // if (psc(outdims) == 3) - { - v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); - } + afpvec8 v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; @@ -166,18 +147,7 @@ void main() } #if NCNN_image_shader - if (psc(outdims) == 1) - { - image1d_st8(top_blob_1d, gx, res); - } - else if (psc(outdims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(outdims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st8(a_blob_data, gi, res); #endif @@ -186,20 +156,7 @@ void main() { // type 7 13 19 #if NCNN_image_shader - afpvec8 v2; - - if (psc(outdims) == 1) - { - v2 = image1d_ld8(b_blob_1d, gx); - } - else if (psc(outdims) == 2) - { - v2 = image2d_ld8(b_blob_2d, ivec2(gx, gy)); - } - else // if (psc(outdims) == 3) - { - v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); - } + afpvec8 v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); #else afpvec8 v2 = buffer_ld8(b_blob_data, gi); #endif @@ -251,18 +208,7 @@ void main() } #if NCNN_image_shader - if (psc(outdims) == 1) - { - image1d_st8(top_blob_1d, gx, res); - } - else if (psc(outdims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(outdims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st8(top_blob_data, gi, res); #endif diff --git a/src/layer/vulkan/shader/cast_fp16_to_fp32.comp b/src/layer/vulkan/shader/cast_fp16_to_fp32.comp index 373c5ad90..d846e42d4 100644 --- a/src/layer/vulkan/shader/cast_fp16_to_fp32.comp +++ b/src/layer/vulkan/shader/cast_fp16_to_fp32.comp @@ -35,11 +35,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, r32f) writeonly uniform highp image1D top_blob_1d; -layout (binding = 1, r32f) writeonly uniform highp image2D top_blob_2d; layout (binding = 1, r32f) writeonly uniform highp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; @@ -71,18 +67,7 @@ void main() return; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_cp1(top_blob_1d, gx, bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - image2d_cp1(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); - } + image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; diff --git a/src/layer/vulkan/shader/cast_fp16_to_fp32_pack4.comp b/src/layer/vulkan/shader/cast_fp16_to_fp32_pack4.comp index c15096d52..555415514 100644 --- a/src/layer/vulkan/shader/cast_fp16_to_fp32_pack4.comp +++ b/src/layer/vulkan/shader/cast_fp16_to_fp32_pack4.comp @@ -35,11 +35,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, rgba32f) writeonly uniform highp image1D top_blob_1d; -layout (binding = 1, rgba32f) writeonly uniform highp image2D top_blob_2d; layout (binding = 1, rgba32f) writeonly uniform highp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; @@ -71,18 +67,7 @@ void main() return; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_cp4(top_blob_1d, gx, bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - image2d_cp4(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); - } + image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; diff --git a/src/layer/vulkan/shader/cast_fp16_to_fp32_pack8.comp b/src/layer/vulkan/shader/cast_fp16_to_fp32_pack8.comp index 23ef342aa..302db2945 100644 --- a/src/layer/vulkan/shader/cast_fp16_to_fp32_pack8.comp +++ b/src/layer/vulkan/shader/cast_fp16_to_fp32_pack8.comp @@ -36,11 +36,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, rgba32f) writeonly uniform highp image1D top_blob_1d; -layout (binding = 1, rgba32f) writeonly uniform highp image2D top_blob_2d; layout (binding = 1, rgba32f) writeonly uniform highp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; @@ -72,18 +68,7 @@ void main() return; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_cp8(top_blob_1d, gx, bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - image2d_cp8(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); - } + image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; diff --git a/src/layer/vulkan/shader/cast_fp32_to_fp16.comp b/src/layer/vulkan/shader/cast_fp32_to_fp16.comp index fce73fc3c..7911de9e7 100644 --- a/src/layer/vulkan/shader/cast_fp32_to_fp16.comp +++ b/src/layer/vulkan/shader/cast_fp32_to_fp16.comp @@ -35,11 +35,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform highp sampler1D bottom_blob_1d; -layout (binding = 0) uniform highp sampler2D bottom_blob_2d; layout (binding = 0) uniform highp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { float bottom_blob_data[]; }; @@ -71,18 +67,7 @@ void main() return; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_cp1(top_blob_1d, gx, bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - image2d_cp1(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); - } + image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; diff --git a/src/layer/vulkan/shader/cast_fp32_to_fp16_pack4.comp b/src/layer/vulkan/shader/cast_fp32_to_fp16_pack4.comp index 56c2575c5..7e3e4fc7f 100644 --- a/src/layer/vulkan/shader/cast_fp32_to_fp16_pack4.comp +++ b/src/layer/vulkan/shader/cast_fp32_to_fp16_pack4.comp @@ -35,11 +35,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform highp sampler1D bottom_blob_1d; -layout (binding = 0) uniform highp sampler2D bottom_blob_2d; layout (binding = 0) uniform highp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { vec4 bottom_blob_data[]; }; @@ -71,18 +67,7 @@ void main() return; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_cp4(top_blob_1d, gx, bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - image2d_cp4(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); - } + image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; diff --git a/src/layer/vulkan/shader/cast_fp32_to_fp16_pack8.comp b/src/layer/vulkan/shader/cast_fp32_to_fp16_pack8.comp index 7ada1f149..69dfb475d 100644 --- a/src/layer/vulkan/shader/cast_fp32_to_fp16_pack8.comp +++ b/src/layer/vulkan/shader/cast_fp32_to_fp16_pack8.comp @@ -36,11 +36,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform highp sampler1D bottom_blob_1d; -layout (binding = 0) uniform highp sampler2D bottom_blob_2d; layout (binding = 0) uniform highp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { mat2x4 bottom_blob_data[]; }; @@ -72,18 +68,7 @@ void main() return; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_cp8(top_blob_1d, gx, bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - image2d_cp8(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); - } + image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; diff --git a/src/layer/vulkan/shader/clip.comp b/src/layer/vulkan/shader/clip.comp index 2fcb9340a..10d1efe53 100644 --- a/src/layer/vulkan/shader/clip.comp +++ b/src/layer/vulkan/shader/clip.comp @@ -32,11 +32,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; @@ -61,19 +57,7 @@ void main() return; #if NCNN_image_shader - afp v; - if (psc(dims) == 1) - { - v = image1d_ld1(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -83,18 +67,7 @@ void main() v = clamp(v, afp(const_min), afp(const_max)); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st1(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st1(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/clip_pack4.comp b/src/layer/vulkan/shader/clip_pack4.comp index bf2b056a2..099cd2e90 100644 --- a/src/layer/vulkan/shader/clip_pack4.comp +++ b/src/layer/vulkan/shader/clip_pack4.comp @@ -32,11 +32,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; @@ -61,19 +57,7 @@ void main() return; #if NCNN_image_shader - afpvec4 v; - if (psc(dims) == 1) - { - v = image1d_ld4(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -83,18 +67,7 @@ void main() v = clamp(v, afp(const_min), afp(const_max)); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st4(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st4(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/clip_pack8.comp b/src/layer/vulkan/shader/clip_pack8.comp index e11bc3a46..9d81e7b4c 100644 --- a/src/layer/vulkan/shader/clip_pack8.comp +++ b/src/layer/vulkan/shader/clip_pack8.comp @@ -33,11 +33,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; @@ -62,19 +58,7 @@ void main() return; #if NCNN_image_shader - afpvec8 v; - if (psc(dims) == 1) - { - v = image1d_ld8(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -85,18 +69,7 @@ void main() v[1] = clamp(v[1], afp(const_min), afp(const_max)); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st8(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st8(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/concat.comp b/src/layer/vulkan/shader/concat.comp index bdb978e06..4636c330f 100644 --- a/src/layer/vulkan/shader/concat.comp +++ b/src/layer/vulkan/shader/concat.comp @@ -37,11 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; @@ -74,29 +70,15 @@ void main() if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) return; -#if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_cp1(top_blob_1d, gx + p.offset, bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - if (axis == 0) image2d_cp1(top_blob_2d, ivec2(gx, gy + p.offset), bottom_blob_2d, ivec2(gx, gy)); - if (axis == 1) image2d_cp1(top_blob_2d, ivec2(gx + p.offset, gy), bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - if (axis == 0) image3d_cp1(top_blob_3d, ivec3(gx, gy, gz + p.offset), bottom_blob_3d, ivec3(gx, gy, gz)); - if (axis == 1) image3d_cp1(top_blob_3d, ivec3(gx, gy + p.offset, gz), bottom_blob_3d, ivec3(gx, gy, gz)); - if (axis == 2) image3d_cp1(top_blob_3d, ivec3(gx + p.offset, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); - } -#else - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - ivec3 gxyz = ivec3(gx, gy, gz); gxyz[psc(dims) - 1 - axis] += p.offset; +#if NCNN_image_shader + image3d_cp1(top_blob_3d, gxyz, bottom_blob_3d, ivec3(gx, gy, gz)); +#else + const int gi = gz * psc(cstep) + gy * psc(w) + gx; + int v_offset = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x; buffer_cp1(top_blob_data, v_offset, bottom_blob_data, gi); diff --git a/src/layer/vulkan/shader/concat_pack4.comp b/src/layer/vulkan/shader/concat_pack4.comp index d7405a040..c2598f54e 100644 --- a/src/layer/vulkan/shader/concat_pack4.comp +++ b/src/layer/vulkan/shader/concat_pack4.comp @@ -37,11 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; @@ -74,29 +70,15 @@ void main() if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) return; -#if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_cp4(top_blob_1d, gx + p.offset, bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - if (axis == 0) image2d_cp4(top_blob_2d, ivec2(gx, gy + p.offset), bottom_blob_2d, ivec2(gx, gy)); - if (axis == 1) image2d_cp4(top_blob_2d, ivec2(gx + p.offset, gy), bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - if (axis == 0) image3d_cp4(top_blob_3d, ivec3(gx, gy, gz + p.offset), bottom_blob_3d, ivec3(gx, gy, gz)); - if (axis == 1) image3d_cp4(top_blob_3d, ivec3(gx, gy + p.offset, gz), bottom_blob_3d, ivec3(gx, gy, gz)); - if (axis == 2) image3d_cp4(top_blob_3d, ivec3(gx + p.offset, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); - } -#else - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - ivec3 gxyz = ivec3(gx, gy, gz); gxyz[psc(dims) - 1 - axis] += p.offset; +#if NCNN_image_shader + image3d_cp4(top_blob_3d, gxyz, bottom_blob_3d, ivec3(gx, gy, gz)); +#else + const int gi = gz * psc(cstep) + gy * psc(w) + gx; + int v_offset = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x; buffer_cp4(top_blob_data, v_offset, bottom_blob_data, gi); diff --git a/src/layer/vulkan/shader/concat_pack4to1.comp b/src/layer/vulkan/shader/concat_pack4to1.comp index dbd6ebd6e..f0550e19c 100644 --- a/src/layer/vulkan/shader/concat_pack4to1.comp +++ b/src/layer/vulkan/shader/concat_pack4to1.comp @@ -37,11 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; @@ -74,81 +70,65 @@ void main() if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) return; + ivec3 gxyz = ivec3(gx, gy, gz); + + gxyz[psc(dims) - 1] *= 4; + gxyz[psc(dims) - 1 - axis] += p.offset; + #if NCNN_image_shader + afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); + if (psc(dims) == 1) { - afpvec4 v = image1d_ld4(bottom_blob_1d, gx); - - int gx4 = gx * 4 + p.offset; - - image1d_st1(top_blob_1d, gx4 + 0, v.r); - image1d_st1(top_blob_1d, gx4 + 1, v.g); - image1d_st1(top_blob_1d, gx4 + 2, v.b); - image1d_st1(top_blob_1d, gx4 + 3, v.a); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, 0, 0), v.r); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, 0, 0), v.g); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, 0, 0), v.b); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, 0, 0), v.a); } else if (psc(dims) == 2) { - afpvec4 v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - if (axis == 0) { - int gy4 = gy * 4 + p.offset; - - image2d_st1(top_blob_2d, ivec2(gx, gy4 + 0), v.r); - image2d_st1(top_blob_2d, ivec2(gx, gy4 + 1), v.g); - image2d_st1(top_blob_2d, ivec2(gx, gy4 + 2), v.b); - image2d_st1(top_blob_2d, ivec2(gx, gy4 + 3), v.a); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, 0), v.r); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, 0), v.g); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 2, 0), v.b); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 3, 0), v.a); } if (axis == 1) { - int gx4 = gx * 4 + p.offset; - - image2d_st1(top_blob_2d, ivec2(gx4 + 0, gy), v.r); - image2d_st1(top_blob_2d, ivec2(gx4 + 1, gy), v.g); - image2d_st1(top_blob_2d, ivec2(gx4 + 2, gy), v.b); - image2d_st1(top_blob_2d, ivec2(gx4 + 3, gy), v.a); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, 0), v.r); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, 0), v.g); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, gxyz.y, 0), v.b); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, gxyz.y, 0), v.a); } } else // if (psc(dims) == 3) { - afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - if (axis == 0) { - int gz4 = gz * 4 + p.offset; - - image3d_st1(top_blob_3d, ivec3(gx, gy, gz4 + 0), v.r); - image3d_st1(top_blob_3d, ivec3(gx, gy, gz4 + 1), v.g); - image3d_st1(top_blob_3d, ivec3(gx, gy, gz4 + 2), v.b); - image3d_st1(top_blob_3d, ivec3(gx, gy, gz4 + 3), v.a); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 0), v.r); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 1), v.g); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 2), v.b); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 3), v.a); } if (axis == 1) { - int gy4 = gy * 4 + p.offset; - - image3d_st1(top_blob_3d, ivec3(gx, gy4 + 0, gz), v.r); - image3d_st1(top_blob_3d, ivec3(gx, gy4 + 1, gz), v.g); - image3d_st1(top_blob_3d, ivec3(gx, gy4 + 2, gz), v.b); - image3d_st1(top_blob_3d, ivec3(gx, gy4 + 3, gz), v.a); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, gxyz.z), v.r); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, gxyz.z), v.g); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 2, gxyz.z), v.b); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 3, gxyz.z), v.a); } if (axis == 2) { - int gx4 = gx * 4 + p.offset; - - image3d_st1(top_blob_3d, ivec3(gx4 + 0, gy, gz), v.r); - image3d_st1(top_blob_3d, ivec3(gx4 + 1, gy, gz), v.g); - image3d_st1(top_blob_3d, ivec3(gx4 + 2, gy, gz), v.b); - image3d_st1(top_blob_3d, ivec3(gx4 + 3, gy, gz), v.a); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, gxyz.z), v.r); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, gxyz.z), v.g); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, gxyz.y, gxyz.z), v.b); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, gxyz.y, gxyz.z), v.a); } } #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; - ivec3 gxyz = ivec3(gx, gy, gz); - - gxyz[psc(dims) - 1] *= 4; - gxyz[psc(dims) - 1 - axis] += p.offset; - int v_offset_0 = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x; ivec3 gxyz4 = ivec3(1, psc(outw), psc(outcstep)); diff --git a/src/layer/vulkan/shader/concat_pack8.comp b/src/layer/vulkan/shader/concat_pack8.comp index 97d059b17..fe6670b68 100644 --- a/src/layer/vulkan/shader/concat_pack8.comp +++ b/src/layer/vulkan/shader/concat_pack8.comp @@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; @@ -75,29 +71,15 @@ void main() if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) return; -#if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_cp8(top_blob_1d, gx + p.offset, bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - if (axis == 0) image2d_cp8(top_blob_2d, ivec2(gx, gy + p.offset), bottom_blob_2d, ivec2(gx, gy)); - if (axis == 1) image2d_cp8(top_blob_2d, ivec2(gx + p.offset, gy), bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - if (axis == 0) image3d_cp8(top_blob_3d, ivec3(gx, gy, gz + p.offset), bottom_blob_3d, ivec3(gx, gy, gz)); - if (axis == 1) image3d_cp8(top_blob_3d, ivec3(gx, gy + p.offset, gz), bottom_blob_3d, ivec3(gx, gy, gz)); - if (axis == 2) image3d_cp8(top_blob_3d, ivec3(gx + p.offset, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); - } -#else - const int gi = gz * psc(cstep) + gy * psc(w) + gx; - ivec3 gxyz = ivec3(gx, gy, gz); gxyz[psc(dims) - 1 - axis] += p.offset; +#if NCNN_image_shader + image3d_cp8(top_blob_3d, gxyz, bottom_blob_3d, ivec3(gx, gy, gz)); +#else + const int gi = gz * psc(cstep) + gy * psc(w) + gx; + int v_offset = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x; buffer_cp8(top_blob_data, v_offset, bottom_blob_data, gi); diff --git a/src/layer/vulkan/shader/concat_pack8to1.comp b/src/layer/vulkan/shader/concat_pack8to1.comp index 03a6ad53a..d63ed5f2c 100644 --- a/src/layer/vulkan/shader/concat_pack8to1.comp +++ b/src/layer/vulkan/shader/concat_pack8to1.comp @@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; @@ -75,105 +71,89 @@ void main() if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) return; + ivec3 gxyz = ivec3(gx, gy, gz); + + gxyz[psc(dims) - 1] *= 8; + gxyz[psc(dims) - 1 - axis] += p.offset; + #if NCNN_image_shader + afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); + if (psc(dims) == 1) { - afpvec8 v = image1d_ld8(bottom_blob_1d, gx); - - int gx8 = gx * 8 + p.offset; - - image1d_st1(top_blob_1d, gx8 + 0, v[0].r); - image1d_st1(top_blob_1d, gx8 + 1, v[0].g); - image1d_st1(top_blob_1d, gx8 + 2, v[0].b); - image1d_st1(top_blob_1d, gx8 + 3, v[0].a); - image1d_st1(top_blob_1d, gx8 + 4, v[1].r); - image1d_st1(top_blob_1d, gx8 + 5, v[1].g); - image1d_st1(top_blob_1d, gx8 + 6, v[1].b); - image1d_st1(top_blob_1d, gx8 + 7, v[1].a); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, 0, 0), v[0].r); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, 0, 0), v[0].g); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, 0, 0), v[0].b); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, 0, 0), v[0].a); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 4, 0, 0), v[1].r); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 5, 0, 0), v[1].g); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 6, 0, 0), v[1].b); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 7, 0, 0), v[1].a); } else if (psc(dims) == 2) { - afpvec8 v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - if (axis == 0) { - int gy8 = gy * 8 + p.offset; - - image2d_st1(top_blob_2d, ivec2(gx, gy8 + 0), v[0].r); - image2d_st1(top_blob_2d, ivec2(gx, gy8 + 1), v[0].g); - image2d_st1(top_blob_2d, ivec2(gx, gy8 + 2), v[0].b); - image2d_st1(top_blob_2d, ivec2(gx, gy8 + 3), v[0].a); - image2d_st1(top_blob_2d, ivec2(gx, gy8 + 4), v[1].r); - image2d_st1(top_blob_2d, ivec2(gx, gy8 + 5), v[1].g); - image2d_st1(top_blob_2d, ivec2(gx, gy8 + 6), v[1].b); - image2d_st1(top_blob_2d, ivec2(gx, gy8 + 7), v[1].a); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, 0), v[0].r); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, 0), v[0].g); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 2, 0), v[0].b); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 3, 0), v[0].a); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 4, 0), v[1].r); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 5, 0), v[1].g); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 6, 0), v[1].b); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 7, 0), v[1].a); } if (axis == 1) { - int gx8 = gx * 8 + p.offset; - - image2d_st1(top_blob_2d, ivec2(gx8 + 0, gy), v[0].r); - image2d_st1(top_blob_2d, ivec2(gx8 + 1, gy), v[0].g); - image2d_st1(top_blob_2d, ivec2(gx8 + 2, gy), v[0].b); - image2d_st1(top_blob_2d, ivec2(gx8 + 3, gy), v[0].a); - image2d_st1(top_blob_2d, ivec2(gx8 + 4, gy), v[1].r); - image2d_st1(top_blob_2d, ivec2(gx8 + 5, gy), v[1].g); - image2d_st1(top_blob_2d, ivec2(gx8 + 6, gy), v[1].b); - image2d_st1(top_blob_2d, ivec2(gx8 + 7, gy), v[1].a); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, 0), v[0].r); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, 0), v[0].g); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, gxyz.y, 0), v[0].b); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, gxyz.y, 0), v[0].a); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 4, gxyz.y, 0), v[1].r); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 5, gxyz.y, 0), v[1].g); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 6, gxyz.y, 0), v[1].b); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 7, gxyz.y, 0), v[1].a); } } else // if (psc(dims) == 3) { - afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - if (axis == 0) { - int gz8 = gz * 8 + p.offset; - - image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 0), v[0].r); - image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 1), v[0].g); - image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 2), v[0].b); - image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 3), v[0].a); - image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 4), v[1].r); - image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 5), v[1].g); - image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 6), v[1].b); - image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 7), v[1].a); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 0), v[0].r); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 1), v[0].g); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 2), v[0].b); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 3), v[0].a); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 4), v[1].r); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 5), v[1].g); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 6), v[1].b); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 7), v[1].a); } if (axis == 1) { - int gy8 = gy * 8 + p.offset; - - image3d_st1(top_blob_3d, ivec3(gx, gy8 + 0, gz), v[0].r); - image3d_st1(top_blob_3d, ivec3(gx, gy8 + 1, gz), v[0].g); - image3d_st1(top_blob_3d, ivec3(gx, gy8 + 2, gz), v[0].b); - image3d_st1(top_blob_3d, ivec3(gx, gy8 + 3, gz), v[0].a); - image3d_st1(top_blob_3d, ivec3(gx, gy8 + 4, gz), v[1].r); - image3d_st1(top_blob_3d, ivec3(gx, gy8 + 5, gz), v[1].g); - image3d_st1(top_blob_3d, ivec3(gx, gy8 + 6, gz), v[1].b); - image3d_st1(top_blob_3d, ivec3(gx, gy8 + 7, gz), v[1].a); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, gxyz.z), v[0].r); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, gxyz.z), v[0].g); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 2, gxyz.z), v[0].b); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 3, gxyz.z), v[0].a); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 4, gxyz.z), v[1].r); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 5, gxyz.z), v[1].g); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 6, gxyz.z), v[1].b); + image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 7, gxyz.z), v[1].a); } if (axis == 2) { - int gx8 = gx * 8 + p.offset; - - image3d_st1(top_blob_3d, ivec3(gx8 + 0, gy, gz), v[0].r); - image3d_st1(top_blob_3d, ivec3(gx8 + 1, gy, gz), v[0].g); - image3d_st1(top_blob_3d, ivec3(gx8 + 2, gy, gz), v[0].b); - image3d_st1(top_blob_3d, ivec3(gx8 + 3, gy, gz), v[0].a); - image3d_st1(top_blob_3d, ivec3(gx8 + 4, gy, gz), v[1].r); - image3d_st1(top_blob_3d, ivec3(gx8 + 5, gy, gz), v[1].g); - image3d_st1(top_blob_3d, ivec3(gx8 + 6, gy, gz), v[1].b); - image3d_st1(top_blob_3d, ivec3(gx8 + 7, gy, gz), v[1].a); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, gxyz.z), v[0].r); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, gxyz.z), v[0].g); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, gxyz.y, gxyz.z), v[0].b); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, gxyz.y, gxyz.z), v[0].a); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 4, gxyz.y, gxyz.z), v[1].r); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 5, gxyz.y, gxyz.z), v[1].g); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 6, gxyz.y, gxyz.z), v[1].b); + image3d_st1(top_blob_3d, ivec3(gxyz.x + 7, gxyz.y, gxyz.z), v[1].a); } } #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; - ivec3 gxyz = ivec3(gx, gy, gz); - - gxyz[psc(dims) - 1] *= 8; - gxyz[psc(dims) - 1 - axis] += p.offset; - int v_offset_0 = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x; ivec3 gxyz4 = ivec3(1, psc(outw), psc(outcstep)); diff --git a/src/layer/vulkan/shader/concat_pack8to4.comp b/src/layer/vulkan/shader/concat_pack8to4.comp index fbb64cbdd..61c23261a 100644 --- a/src/layer/vulkan/shader/concat_pack8to4.comp +++ b/src/layer/vulkan/shader/concat_pack8to4.comp @@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; @@ -75,70 +71,54 @@ void main() if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) return; + ivec3 gxyz = ivec3(gx, gy, gz); + + gxyz[psc(dims) - 1] *= 2; + gxyz[psc(dims) - 1 - axis] += p.offset; + #if NCNN_image_shader + afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); + if (psc(dims) == 1) { - afpvec8 v = image1d_ld8(bottom_blob_1d, gx); - - int gx2 = gx * 2 + p.offset; - - image1d_st4(top_blob_1d, gx2 + 0, v[0]); - image1d_st4(top_blob_1d, gx2 + 1, v[1]); + image3d_st4(top_blob_3d, ivec3(gxyz.x + 0, 0, 0), v[0]); + image3d_st4(top_blob_3d, ivec3(gxyz.x + 1, 0, 0), v[1]); } else if (psc(dims) == 2) { - afpvec8 v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - if (axis == 0) { - int gy2 = gy * 2 + p.offset; - - image2d_st4(top_blob_2d, ivec2(gx, gy2 + 0), v[0]); - image2d_st4(top_blob_2d, ivec2(gx, gy2 + 1), v[1]); + image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, 0), v[0]); + image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, 0), v[1]); } if (axis == 1) { - int gx2 = gx * 2 + p.offset; - - image2d_st4(top_blob_2d, ivec2(gx2 + 0, gy), v[0]); - image2d_st4(top_blob_2d, ivec2(gx2 + 1, gy), v[1]); + image3d_st4(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, 0), v[0]); + image3d_st4(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, 0), v[1]); } } else // if (psc(dims) == 3) { - afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - if (axis == 0) { - int gz2 = gz * 2 + p.offset; - - image3d_st4(top_blob_3d, ivec3(gx, gy, gz2 + 0), v[0]); - image3d_st4(top_blob_3d, ivec3(gx, gy, gz2 + 1), v[1]); + image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 0), v[0]); + image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 1), v[1]); } if (axis == 1) { - int gy2 = gy * 2 + p.offset; - - image3d_st4(top_blob_3d, ivec3(gx, gy2 + 0, gz), v[0]); - image3d_st4(top_blob_3d, ivec3(gx, gy2 + 1, gz), v[1]); + image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, gxyz.z), v[0]); + image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, gxyz.z), v[1]); } if (axis == 2) { - int gx2 = gx * 2 + p.offset; - - image3d_st4(top_blob_3d, ivec3(gx2 + 0, gy, gz), v[0]); - image3d_st4(top_blob_3d, ivec3(gx2 + 1, gy, gz), v[1]); + image3d_st4(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, gxyz.z), v[0]); + image3d_st4(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, gxyz.z), v[1]); } } #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; - ivec3 gxyz = ivec3(gx, gy, gz); - - gxyz[psc(dims) - 1] *= 2; - gxyz[psc(dims) - 1 - axis] += p.offset; - int v_offset_0 = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x; ivec3 gxyz4 = ivec3(1, psc(outw), psc(outcstep)); diff --git a/src/layer/vulkan/shader/convolution.comp b/src/layer/vulkan/shader/convolution.comp index 9f89bffff..db5f31883 100644 --- a/src/layer/vulkan/shader/convolution.comp +++ b/src/layer/vulkan/shader/convolution.comp @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -86,7 +86,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gz); + sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld1(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolution_1x1s1d1.comp b/src/layer/vulkan/shader/convolution_1x1s1d1.comp index df5b28339..74b81959f 100644 --- a/src/layer/vulkan/shader/convolution_1x1s1d1.comp +++ b/src/layer/vulkan/shader/convolution_1x1s1d1.comp @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else #if NCNN_fp16_packed layout (binding = 0) readonly buffer bottom_blob { vec4 bottom_blob_data[]; }; @@ -100,7 +100,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = afpvec4(image1d_ld1(bias_blob, gz)); + sum = afpvec4(image3d_ld1(bias_blob, ivec3(gz, 0, 0))); #else sum = afpvec4(buffer_ld1(bias_data, gz)); #endif diff --git a/src/layer/vulkan/shader/convolution_pack1to4.comp b/src/layer/vulkan/shader/convolution_pack1to4.comp index 12e2e3478..68f09a714 100644 --- a/src/layer/vulkan/shader/convolution_pack1to4.comp +++ b/src/layer/vulkan/shader/convolution_pack1to4.comp @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -86,7 +86,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gz); + sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld4(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolution_pack1to8.comp b/src/layer/vulkan/shader/convolution_pack1to8.comp index c116a9870..957fff893 100644 --- a/src/layer/vulkan/shader/convolution_pack1to8.comp +++ b/src/layer/vulkan/shader/convolution_pack1to8.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gz); + sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld8(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolution_pack4.comp b/src/layer/vulkan/shader/convolution_pack4.comp index f1a4e9e57..bc79c11bd 100644 --- a/src/layer/vulkan/shader/convolution_pack4.comp +++ b/src/layer/vulkan/shader/convolution_pack4.comp @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -91,7 +91,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gz); + sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld4(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolution_pack4_1x1s1d1.comp b/src/layer/vulkan/shader/convolution_pack4_1x1s1d1.comp index 18995d0f4..362e5c43c 100644 --- a/src/layer/vulkan/shader/convolution_pack4_1x1s1d1.comp +++ b/src/layer/vulkan/shader/convolution_pack4_1x1s1d1.comp @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -103,7 +103,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - afpvec4 b = image1d_ld4(bias_blob, gz); + afpvec4 b = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); #else afpvec4 b = buffer_ld4(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd23_transform_output.comp b/src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd23_transform_output.comp index c693e74a4..83afff616 100644 --- a/src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd23_transform_output.comp +++ b/src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd23_transform_output.comp @@ -40,7 +40,7 @@ layout (constant_id = shape_constant_id_offset + 6) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D top_tm_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler1D bias_blob; +layout (binding = 2) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer top_tm_blob { sfpvec4 top_tm_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -129,7 +129,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - const afpvec4 bias_value = image1d_ld4(bias_blob, gz); + const afpvec4 bias_value = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); #else const afpvec4 bias_value = buffer_ld4(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolution_pack4to1.comp b/src/layer/vulkan/shader/convolution_pack4to1.comp index ecba9addd..25637a164 100644 --- a/src/layer/vulkan/shader/convolution_pack4to1.comp +++ b/src/layer/vulkan/shader/convolution_pack4to1.comp @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -86,7 +86,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gz); + sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld1(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolution_pack4to8.comp b/src/layer/vulkan/shader/convolution_pack4to8.comp index 1d2650b85..bb3756a18 100644 --- a/src/layer/vulkan/shader/convolution_pack4to8.comp +++ b/src/layer/vulkan/shader/convolution_pack4to8.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gz); + sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld8(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolution_pack8.comp b/src/layer/vulkan/shader/convolution_pack8.comp index 613d6c055..6660645a6 100644 --- a/src/layer/vulkan/shader/convolution_pack8.comp +++ b/src/layer/vulkan/shader/convolution_pack8.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gz); + sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld8(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolution_pack8_1x1s1d1.comp b/src/layer/vulkan/shader/convolution_pack8_1x1s1d1.comp index f363d209b..7db4806ea 100644 --- a/src/layer/vulkan/shader/convolution_pack8_1x1s1d1.comp +++ b/src/layer/vulkan/shader/convolution_pack8_1x1s1d1.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -99,7 +99,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - afpvec8 b = image1d_ld8(bias_blob, gz); + afpvec8 b = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); #else afpvec8 b = buffer_ld8(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd23_transform_output.comp b/src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd23_transform_output.comp index f15f48b8e..9b9827707 100644 --- a/src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd23_transform_output.comp +++ b/src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd23_transform_output.comp @@ -41,7 +41,7 @@ layout (constant_id = shape_constant_id_offset + 6) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D top_tm_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler1D bias_blob; +layout (binding = 2) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer top_tm_blob { sfpvec8 top_tm_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -130,7 +130,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - const afpvec8 bias_value = image1d_ld8(bias_blob, gz); + const afpvec8 bias_value = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); #else const afpvec8 bias_value = buffer_ld8(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolution_pack8to1.comp b/src/layer/vulkan/shader/convolution_pack8to1.comp index 4be219f92..b75a32bcf 100644 --- a/src/layer/vulkan/shader/convolution_pack8to1.comp +++ b/src/layer/vulkan/shader/convolution_pack8to1.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gz); + sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld1(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolution_pack8to4.comp b/src/layer/vulkan/shader/convolution_pack8to4.comp index 9109f558d..922ca5570 100644 --- a/src/layer/vulkan/shader/convolution_pack8to4.comp +++ b/src/layer/vulkan/shader/convolution_pack8to4.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gz); + sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld4(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolutiondepthwise.comp b/src/layer/vulkan/shader/convolutiondepthwise.comp index 4f0451cfd..6159dcdfe 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise.comp @@ -49,8 +49,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler2D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 2) uniform unfp sampler3D weight_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gz); + sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld1(bias_data, gz); #endif @@ -108,7 +108,7 @@ void main() for (int x = 0; x < kernel_w; x++) { - sum += image2d_ld1(weight_blob, ivec2(wx, gz)) * image3d_ld1(bottom_blob, ivec3(sx, sy, gz)); + sum += image3d_ld1(weight_blob, ivec3(wx, gz, 0)) * image3d_ld1(bottom_blob, ivec3(sx, sy, gz)); sx += dilation_w; wx += 1; diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group.comp b/src/layer/vulkan/shader/convolutiondepthwise_group.comp index 32069bf5a..b59a42f85 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gz); + sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld1(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to4.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to4.comp index a3e9eb2e5..6fdedca30 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to4.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to4.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gz); + sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld4(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to8.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to8.comp index b32a6aa87..284e8eb21 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to8.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack1to8.comp @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -88,7 +88,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gz); + sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld8(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4.comp index 2c9661fa6..beef63e4a 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -92,7 +92,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gz); + sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld4(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to1.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to1.comp index 7871cccb1..56a1175c7 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to1.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to1.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gz); + sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld1(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to8.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to8.comp index f369a244d..eb1dbdaa2 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to8.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack4to8.comp @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -88,7 +88,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gz); + sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld8(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8.comp index abd16aed8..0c3c2934c 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8.comp @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -88,7 +88,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gz); + sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld8(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to1.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to1.comp index c77771154..6cded579e 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to1.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to1.comp @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -88,7 +88,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gz); + sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld1(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to4.comp b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to4.comp index 9c9f43a89..54b55c25e 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to4.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_group_pack8to4.comp @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -88,7 +88,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gz); + sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld4(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/convolutiondepthwise_pack4.comp b/src/layer/vulkan/shader/convolutiondepthwise_pack4.comp index 3ed798841..d3e6ceb69 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_pack4.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_pack4.comp @@ -49,8 +49,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler2D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 2) uniform unfp sampler3D weight_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gz); + sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld4(bias_data, gz); #endif @@ -110,7 +110,7 @@ void main() { afpvec4 v = image3d_ld4(bottom_blob, ivec3(sx, sy, gz)); - afpvec4 k = image2d_ld4(weight_blob, ivec2(wx, gz)); + afpvec4 k = image3d_ld4(weight_blob, ivec3(wx, gz, 0)); sum += v * k; diff --git a/src/layer/vulkan/shader/convolutiondepthwise_pack8.comp b/src/layer/vulkan/shader/convolutiondepthwise_pack8.comp index 3e82b9bac..558051a91 100644 --- a/src/layer/vulkan/shader/convolutiondepthwise_pack8.comp +++ b/src/layer/vulkan/shader/convolutiondepthwise_pack8.comp @@ -50,8 +50,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler2D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 2) uniform unfp sampler3D weight_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -88,7 +88,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gz); + sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld8(bias_data, gz); #endif @@ -111,7 +111,7 @@ void main() { afpvec8 v = image3d_ld8(bottom_blob, ivec3(sx, sy, gz)); - afpvec8 k = image2d_ld8(weight_blob, ivec2(wx, gz)); + afpvec8 k = image3d_ld8(weight_blob, ivec3(wx, gz, 0)); // sum += v * k; sum[0] += v[0] * k[0]; diff --git a/src/layer/vulkan/shader/deconvolution.comp b/src/layer/vulkan/shader/deconvolution.comp index 3843d56b7..c48034156 100644 --- a/src/layer/vulkan/shader/deconvolution.comp +++ b/src/layer/vulkan/shader/deconvolution.comp @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -86,7 +86,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gz); + sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld1(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolution_pack1to4.comp b/src/layer/vulkan/shader/deconvolution_pack1to4.comp index 4e9ecc6b6..0a56820d3 100644 --- a/src/layer/vulkan/shader/deconvolution_pack1to4.comp +++ b/src/layer/vulkan/shader/deconvolution_pack1to4.comp @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -86,7 +86,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gz); + sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld4(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolution_pack1to8.comp b/src/layer/vulkan/shader/deconvolution_pack1to8.comp index 1288e817c..f6227cd34 100644 --- a/src/layer/vulkan/shader/deconvolution_pack1to8.comp +++ b/src/layer/vulkan/shader/deconvolution_pack1to8.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gz); + sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld8(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolution_pack4.comp b/src/layer/vulkan/shader/deconvolution_pack4.comp index 72a9cf289..baed6081b 100644 --- a/src/layer/vulkan/shader/deconvolution_pack4.comp +++ b/src/layer/vulkan/shader/deconvolution_pack4.comp @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -91,7 +91,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gz); + sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld4(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolution_pack4to1.comp b/src/layer/vulkan/shader/deconvolution_pack4to1.comp index 190300f70..32bf34dd2 100644 --- a/src/layer/vulkan/shader/deconvolution_pack4to1.comp +++ b/src/layer/vulkan/shader/deconvolution_pack4to1.comp @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -86,7 +86,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gz); + sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld1(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolution_pack4to8.comp b/src/layer/vulkan/shader/deconvolution_pack4to8.comp index 0a0cb8905..a1eaf71a1 100644 --- a/src/layer/vulkan/shader/deconvolution_pack4to8.comp +++ b/src/layer/vulkan/shader/deconvolution_pack4to8.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gz); + sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld8(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolution_pack8.comp b/src/layer/vulkan/shader/deconvolution_pack8.comp index d174b0d84..5e042f57d 100644 --- a/src/layer/vulkan/shader/deconvolution_pack8.comp +++ b/src/layer/vulkan/shader/deconvolution_pack8.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gz); + sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld8(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolution_pack8to1.comp b/src/layer/vulkan/shader/deconvolution_pack8to1.comp index bbf862f5c..64e8913c8 100644 --- a/src/layer/vulkan/shader/deconvolution_pack8to1.comp +++ b/src/layer/vulkan/shader/deconvolution_pack8to1.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gz); + sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld1(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolution_pack8to4.comp b/src/layer/vulkan/shader/deconvolution_pack8to4.comp index 99d51af88..2039748ec 100644 --- a/src/layer/vulkan/shader/deconvolution_pack8to4.comp +++ b/src/layer/vulkan/shader/deconvolution_pack8to4.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gz); + sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld4(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise.comp b/src/layer/vulkan/shader/deconvolutiondepthwise.comp index a087f0847..0aebdf2ad 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise.comp @@ -49,8 +49,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler2D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 2) uniform unfp sampler3D weight_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gz); + sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld1(bias_data, gz); #endif @@ -124,7 +124,7 @@ void main() int wx = y * kernel_w + x; - sum += image2d_ld1(weight_blob, ivec2(wx, gz)) * image3d_ld1(bottom_blob, ivec3(sx, sy, gz)); + sum += image3d_ld1(weight_blob, ivec3(wx, gz, 0)) * image3d_ld1(bottom_blob, ivec3(sx, sy, gz)); } } #else diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group.comp index 26ec31ea4..4b2483fa0 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gz); + sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld1(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to4.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to4.comp index 820780498..abdf31067 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to4.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to4.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gz); + sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld4(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to8.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to8.comp index 9aca2e791..ee20f73c9 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to8.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to8.comp @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -88,7 +88,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gz); + sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld8(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4.comp index fa29b5233..c345fac9b 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -92,7 +92,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gz); + sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld4(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to1.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to1.comp index dd1960390..2b76617da 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to1.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to1.comp @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gz); + sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld1(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to8.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to8.comp index 26b094322..8956b2216 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to8.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to8.comp @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -88,7 +88,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gz); + sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld8(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8.comp index a56d228db..884ca96f8 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8.comp @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -88,7 +88,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gz); + sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld8(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to1.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to1.comp index 9d9de0bfe..f68cd94a7 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to1.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to1.comp @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -88,7 +88,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gz); + sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld1(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to4.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to4.comp index d127f8bcb..3df4ab3d4 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to4.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to4.comp @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; layout (binding = 2) uniform unfp sampler3D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -88,7 +88,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gz); + sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld4(bias_data, gz); #endif diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_pack4.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_pack4.comp index 7323dc44b..4ae56eff3 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_pack4.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_pack4.comp @@ -49,8 +49,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler2D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 2) uniform unfp sampler3D weight_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -87,7 +87,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gz); + sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld4(bias_data, gz); #endif @@ -126,7 +126,7 @@ void main() afpvec4 v = image3d_ld4(bottom_blob, ivec3(sx, sy, gz)); - afpvec4 k = image2d_ld4(weight_blob, ivec2(wx, gz)); + afpvec4 k = image3d_ld4(weight_blob, ivec3(wx, gz, 0)); sum += v * k; } diff --git a/src/layer/vulkan/shader/deconvolutiondepthwise_pack8.comp b/src/layer/vulkan/shader/deconvolutiondepthwise_pack8.comp index 47b0c3e24..c90f3d6c3 100644 --- a/src/layer/vulkan/shader/deconvolutiondepthwise_pack8.comp +++ b/src/layer/vulkan/shader/deconvolutiondepthwise_pack8.comp @@ -50,8 +50,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler2D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 2) uniform unfp sampler3D weight_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -88,7 +88,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gz); + sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); #else sum = buffer_ld8(bias_data, gz); #endif @@ -127,7 +127,7 @@ void main() afpvec8 v = image3d_ld8(bottom_blob, ivec3(sx, sy, gz)); - afpvec8 k = image2d_ld8(weight_blob, ivec2(wx, gz)); + afpvec8 k = image3d_ld8(weight_blob, ivec3(wx, gz, 0)); // sum += v * k; sum[0] += v[0] * k[0]; diff --git a/src/layer/vulkan/shader/deepcopy.comp b/src/layer/vulkan/shader/deepcopy.comp index d418cc7fe..22b2975ef 100644 --- a/src/layer/vulkan/shader/deepcopy.comp +++ b/src/layer/vulkan/shader/deepcopy.comp @@ -29,11 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; @@ -59,18 +55,7 @@ void main() return; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_cp1(top_blob_1d, gx, bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - image2d_cp1(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); - } + image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; diff --git a/src/layer/vulkan/shader/deepcopy_pack4.comp b/src/layer/vulkan/shader/deepcopy_pack4.comp index 5696dd68b..074e4f76d 100644 --- a/src/layer/vulkan/shader/deepcopy_pack4.comp +++ b/src/layer/vulkan/shader/deepcopy_pack4.comp @@ -29,11 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; @@ -59,18 +55,7 @@ void main() return; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_cp4(top_blob_1d, gx, bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - image2d_cp4(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); - } + image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; diff --git a/src/layer/vulkan/shader/deepcopy_pack8.comp b/src/layer/vulkan/shader/deepcopy_pack8.comp index 72fec790d..9a36a8c8c 100644 --- a/src/layer/vulkan/shader/deepcopy_pack8.comp +++ b/src/layer/vulkan/shader/deepcopy_pack8.comp @@ -30,11 +30,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; @@ -60,18 +56,7 @@ void main() return; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_cp8(top_blob_1d, gx, bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - image2d_cp8(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); - } + image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; diff --git a/src/layer/vulkan/shader/dropout.comp b/src/layer/vulkan/shader/dropout.comp index 3d66198d2..a978f83c8 100644 --- a/src/layer/vulkan/shader/dropout.comp +++ b/src/layer/vulkan/shader/dropout.comp @@ -31,11 +31,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; @@ -60,19 +56,7 @@ void main() return; #if NCNN_image_shader - afp v; - if (psc(dims) == 1) - { - v = image1d_ld1(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -82,18 +66,7 @@ void main() v *= afp(scale); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st1(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st1(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/dropout_pack4.comp b/src/layer/vulkan/shader/dropout_pack4.comp index 0879ebb56..6189c5167 100644 --- a/src/layer/vulkan/shader/dropout_pack4.comp +++ b/src/layer/vulkan/shader/dropout_pack4.comp @@ -31,11 +31,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; @@ -60,19 +56,7 @@ void main() return; #if NCNN_image_shader - afpvec4 v; - if (psc(dims) == 1) - { - v = image1d_ld4(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -82,18 +66,7 @@ void main() v *= afp(scale); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st4(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st4(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/dropout_pack8.comp b/src/layer/vulkan/shader/dropout_pack8.comp index e73f214aa..d739b396c 100644 --- a/src/layer/vulkan/shader/dropout_pack8.comp +++ b/src/layer/vulkan/shader/dropout_pack8.comp @@ -32,11 +32,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; @@ -61,19 +57,7 @@ void main() return; #if NCNN_image_shader - afpvec8 v; - if (psc(dims) == 1) - { - v = image1d_ld8(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -84,18 +68,7 @@ void main() v[1] = v[1] * afp(scale); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st8(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st8(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/eltwise.comp b/src/layer/vulkan/shader/eltwise.comp index 183a86a87..3705e9e9a 100644 --- a/src/layer/vulkan/shader/eltwise.comp +++ b/src/layer/vulkan/shader/eltwise.comp @@ -32,14 +32,8 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob1_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob1_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob1_3d; -layout (binding = 1) uniform unfp sampler1D bottom_blob2_1d; -layout (binding = 1) uniform unfp sampler2D bottom_blob2_2d; layout (binding = 1) uniform unfp sampler3D bottom_blob2_3d; -layout (binding = 2, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 2, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 2, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob1 { sfp bottom_blob1_data[]; }; @@ -69,23 +63,8 @@ void main() return; #if NCNN_image_shader - afp v1; - afp v2; - if (psc(dims) == 1) - { - v1 = image1d_ld1(bottom_blob1_1d, gx); - v2 = image1d_ld1(bottom_blob2_1d, gx); - } - else if (psc(dims) == 2) - { - v1 = image2d_ld1(bottom_blob1_2d, ivec2(gx, gy)); - v2 = image2d_ld1(bottom_blob2_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v1 = image3d_ld1(bottom_blob1_3d, ivec3(gx, gy, gz)); - v2 = image3d_ld1(bottom_blob2_3d, ivec3(gx, gy, gz)); - } + afp v1 = image3d_ld1(bottom_blob1_3d, ivec3(gx, gy, gz)); + afp v2 = image3d_ld1(bottom_blob2_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -119,18 +98,7 @@ void main() } #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st1(top_blob_1d, gx, res); - } - else if (psc(dims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(dims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st1(top_blob_data, gi, res); #endif diff --git a/src/layer/vulkan/shader/eltwise_pack4.comp b/src/layer/vulkan/shader/eltwise_pack4.comp index 7766ce8e2..32f8798e3 100644 --- a/src/layer/vulkan/shader/eltwise_pack4.comp +++ b/src/layer/vulkan/shader/eltwise_pack4.comp @@ -32,14 +32,8 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob1_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob1_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob1_3d; -layout (binding = 1) uniform unfp sampler1D bottom_blob2_1d; -layout (binding = 1) uniform unfp sampler2D bottom_blob2_2d; layout (binding = 1) uniform unfp sampler3D bottom_blob2_3d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob1 { sfpvec4 bottom_blob1_data[]; }; @@ -69,23 +63,8 @@ void main() return; #if NCNN_image_shader - afpvec4 v1; - afpvec4 v2; - if (psc(dims) == 1) - { - v1 = image1d_ld4(bottom_blob1_1d, gx); - v2 = image1d_ld4(bottom_blob2_1d, gx); - } - else if (psc(dims) == 2) - { - v1 = image2d_ld4(bottom_blob1_2d, ivec2(gx, gy)); - v2 = image2d_ld4(bottom_blob2_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v1 = image3d_ld4(bottom_blob1_3d, ivec3(gx, gy, gz)); - v2 = image3d_ld4(bottom_blob2_3d, ivec3(gx, gy, gz)); - } + afpvec4 v1 = image3d_ld4(bottom_blob1_3d, ivec3(gx, gy, gz)); + afpvec4 v2 = image3d_ld4(bottom_blob2_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -119,18 +98,7 @@ void main() } #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st4(top_blob_1d, gx, res); - } - else if (psc(dims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(dims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st4(top_blob_data, gi, res); #endif diff --git a/src/layer/vulkan/shader/eltwise_pack8.comp b/src/layer/vulkan/shader/eltwise_pack8.comp index c9677f615..0e93d41a3 100644 --- a/src/layer/vulkan/shader/eltwise_pack8.comp +++ b/src/layer/vulkan/shader/eltwise_pack8.comp @@ -33,14 +33,8 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob1_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob1_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob1_3d; -layout (binding = 1) uniform unfp sampler1D bottom_blob2_1d; -layout (binding = 1) uniform unfp sampler2D bottom_blob2_2d; layout (binding = 1) uniform unfp sampler3D bottom_blob2_3d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob1 { sfpvec8 bottom_blob1_data[]; }; @@ -70,23 +64,8 @@ void main() return; #if NCNN_image_shader - afpvec8 v1; - afpvec8 v2; - if (psc(dims) == 1) - { - v1 = image1d_ld8(bottom_blob1_1d, gx); - v2 = image1d_ld8(bottom_blob2_1d, gx); - } - else if (psc(dims) == 2) - { - v1 = image2d_ld8(bottom_blob1_2d, ivec2(gx, gy)); - v2 = image2d_ld8(bottom_blob2_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v1 = image3d_ld8(bottom_blob1_3d, ivec3(gx, gy, gz)); - v2 = image3d_ld8(bottom_blob2_3d, ivec3(gx, gy, gz)); - } + afpvec8 v1 = image3d_ld8(bottom_blob1_3d, ivec3(gx, gy, gz)); + afpvec8 v2 = image3d_ld8(bottom_blob2_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -138,18 +117,7 @@ void main() } #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st8(top_blob_1d, gx, res); - } - else if (psc(dims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(dims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st8(top_blob_data, gi, res); #endif diff --git a/src/layer/vulkan/shader/flatten.comp b/src/layer/vulkan/shader/flatten.comp index e5e06be85..7341346d5 100644 --- a/src/layer/vulkan/shader/flatten.comp +++ b/src/layer/vulkan/shader/flatten.comp @@ -35,9 +35,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob; +layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -78,14 +77,14 @@ void main() if (psc(dims) == 2) { - v = image2d_ld1(bottom_blob_2d, ivec2(x, y)); + v = image3d_ld1(bottom_blob_3d, ivec3(x, y, 0)); } else // if (psc(dims) == 3) { v = image3d_ld1(bottom_blob_3d, ivec3(x, y, z)); } - image1d_st1(top_blob, gx, v); + image3d_st1(top_blob, ivec3(gx, 0, 0), v); #else int v_offset = z * psc(cstep) + y * psc(w) + x; diff --git a/src/layer/vulkan/shader/flatten_pack1to4.comp b/src/layer/vulkan/shader/flatten_pack1to4.comp index 79e3edf95..3b0083938 100644 --- a/src/layer/vulkan/shader/flatten_pack1to4.comp +++ b/src/layer/vulkan/shader/flatten_pack1to4.comp @@ -35,9 +35,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -77,10 +76,10 @@ void main() ivec4 y4 = i4 / psc(w); ivec4 x4 = i4 % psc(w); - v.r = image2d_ld1(bottom_blob_2d, ivec2(x4.r, y4.r)); - v.g = image2d_ld1(bottom_blob_2d, ivec2(x4.g, y4.g)); - v.b = image2d_ld1(bottom_blob_2d, ivec2(x4.b, y4.b)); - v.a = image2d_ld1(bottom_blob_2d, ivec2(x4.a, y4.a)); + v.r = image3d_ld1(bottom_blob_3d, ivec3(x4.r, y4.r, 0)); + v.g = image3d_ld1(bottom_blob_3d, ivec3(x4.g, y4.g, 0)); + v.b = image3d_ld1(bottom_blob_3d, ivec3(x4.b, y4.b, 0)); + v.a = image3d_ld1(bottom_blob_3d, ivec3(x4.a, y4.a, 0)); } else // if (psc(dims) == 3) { @@ -96,7 +95,7 @@ void main() v.a = image3d_ld1(bottom_blob_3d, ivec3(x4.a, y4.a, z4.a)); } - image1d_st4(top_blob, gx, v); + image3d_st4(top_blob, ivec3(gx, 0, 0), v); #else ivec4 v_offset; diff --git a/src/layer/vulkan/shader/flatten_pack1to8.comp b/src/layer/vulkan/shader/flatten_pack1to8.comp index 7b38b2054..739319adb 100644 --- a/src/layer/vulkan/shader/flatten_pack1to8.comp +++ b/src/layer/vulkan/shader/flatten_pack1to8.comp @@ -36,9 +36,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -82,14 +81,14 @@ void main() ivec4 yy4 = ii4 / psc(w); ivec4 xx4 = ii4 % psc(w); - v[0].r = image2d_ld1(bottom_blob_2d, ivec2(x4.r, y4.r)); - v[0].g = image2d_ld1(bottom_blob_2d, ivec2(x4.g, y4.g)); - v[0].b = image2d_ld1(bottom_blob_2d, ivec2(x4.b, y4.b)); - v[0].a = image2d_ld1(bottom_blob_2d, ivec2(x4.a, y4.a)); - v[1].r = image2d_ld1(bottom_blob_2d, ivec2(xx4.r, yy4.r)); - v[1].g = image2d_ld1(bottom_blob_2d, ivec2(xx4.g, yy4.g)); - v[1].b = image2d_ld1(bottom_blob_2d, ivec2(xx4.b, yy4.b)); - v[1].a = image2d_ld1(bottom_blob_2d, ivec2(xx4.a, yy4.a)); + v[0].r = image3d_ld1(bottom_blob_3d, ivec3(x4.r, y4.r, 0)); + v[0].g = image3d_ld1(bottom_blob_3d, ivec3(x4.g, y4.g, 0)); + v[0].b = image3d_ld1(bottom_blob_3d, ivec3(x4.b, y4.b, 0)); + v[0].a = image3d_ld1(bottom_blob_3d, ivec3(x4.a, y4.a, 0)); + v[1].r = image3d_ld1(bottom_blob_3d, ivec3(xx4.r, yy4.r, 0)); + v[1].g = image3d_ld1(bottom_blob_3d, ivec3(xx4.g, yy4.g, 0)); + v[1].b = image3d_ld1(bottom_blob_3d, ivec3(xx4.b, yy4.b, 0)); + v[1].a = image3d_ld1(bottom_blob_3d, ivec3(xx4.a, yy4.a, 0)); } else // if (psc(dims) == 3) { @@ -113,7 +112,7 @@ void main() v[1].a = image3d_ld1(bottom_blob_3d, ivec3(xx4.a, yy4.a, zz4.a)); } - image1d_st8(top_blob, gx, v); + image3d_st8(top_blob, ivec3(gx, 0, 0), v); #else ivec4 v_offset; ivec4 vv_offset; diff --git a/src/layer/vulkan/shader/flatten_pack4.comp b/src/layer/vulkan/shader/flatten_pack4.comp index 8a33f107c..d15f89318 100644 --- a/src/layer/vulkan/shader/flatten_pack4.comp +++ b/src/layer/vulkan/shader/flatten_pack4.comp @@ -35,9 +35,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; #else #if NCNN_fp16_packed layout (binding = 0) readonly buffer bottom_blob { sfpvec2 bottom_blob_data[]; }; @@ -81,10 +80,10 @@ void main() ivec4 y4 = i4 / psc(w); ivec4 x4 = i4 % psc(w); - afpvec4 v0 = image2d_ld4(bottom_blob_2d, ivec2(x4.r, y4.r / 4)); - afpvec4 v1 = image2d_ld4(bottom_blob_2d, ivec2(x4.g, y4.g / 4)); - afpvec4 v2 = image2d_ld4(bottom_blob_2d, ivec2(x4.b, y4.b / 4)); - afpvec4 v3 = image2d_ld4(bottom_blob_2d, ivec2(x4.a, y4.a / 4)); + afpvec4 v0 = image3d_ld4(bottom_blob_3d, ivec3(x4.r, y4.r / 4, 0)); + afpvec4 v1 = image3d_ld4(bottom_blob_3d, ivec3(x4.g, y4.g / 4, 0)); + afpvec4 v2 = image3d_ld4(bottom_blob_3d, ivec3(x4.b, y4.b / 4, 0)); + afpvec4 v3 = image3d_ld4(bottom_blob_3d, ivec3(x4.a, y4.a / 4, 0)); v.r = v0[y4.r % 4]; v.g = v1[y4.g % 4]; @@ -110,7 +109,7 @@ void main() v.a = v3[z4.a % 4]; } - image1d_st4(top_blob, gx, v); + image3d_st4(top_blob, ivec3(gx, 0, 0), v); #else #if NCNN_fp16_packed ivec4 v_offset; diff --git a/src/layer/vulkan/shader/flatten_pack4to8.comp b/src/layer/vulkan/shader/flatten_pack4to8.comp index 2d2fe7dce..66ef75fd7 100644 --- a/src/layer/vulkan/shader/flatten_pack4to8.comp +++ b/src/layer/vulkan/shader/flatten_pack4to8.comp @@ -36,9 +36,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; #else #if NCNN_fp16_packed layout (binding = 0) readonly buffer bottom_blob { sfpvec2 bottom_blob_data[]; }; @@ -85,14 +84,14 @@ void main() ivec4 yy4 = ii4 / psc(w); ivec4 xx4 = ii4 % psc(w); - afpvec4 v0 = image2d_ld4(bottom_blob_2d, ivec2(x4.r, y4.r / 4)); - afpvec4 v1 = image2d_ld4(bottom_blob_2d, ivec2(x4.g, y4.g / 4)); - afpvec4 v2 = image2d_ld4(bottom_blob_2d, ivec2(x4.b, y4.b / 4)); - afpvec4 v3 = image2d_ld4(bottom_blob_2d, ivec2(x4.a, y4.a / 4)); - afpvec4 v4 = image2d_ld4(bottom_blob_2d, ivec2(xx4.r, yy4.r / 4)); - afpvec4 v5 = image2d_ld4(bottom_blob_2d, ivec2(xx4.g, yy4.g / 4)); - afpvec4 v6 = image2d_ld4(bottom_blob_2d, ivec2(xx4.b, yy4.b / 4)); - afpvec4 v7 = image2d_ld4(bottom_blob_2d, ivec2(xx4.a, yy4.a / 4)); + afpvec4 v0 = image3d_ld4(bottom_blob_3d, ivec3(x4.r, y4.r / 4, 0)); + afpvec4 v1 = image3d_ld4(bottom_blob_3d, ivec3(x4.g, y4.g / 4, 0)); + afpvec4 v2 = image3d_ld4(bottom_blob_3d, ivec3(x4.b, y4.b / 4, 0)); + afpvec4 v3 = image3d_ld4(bottom_blob_3d, ivec3(x4.a, y4.a / 4, 0)); + afpvec4 v4 = image3d_ld4(bottom_blob_3d, ivec3(xx4.r, yy4.r / 4, 0)); + afpvec4 v5 = image3d_ld4(bottom_blob_3d, ivec3(xx4.g, yy4.g / 4, 0)); + afpvec4 v6 = image3d_ld4(bottom_blob_3d, ivec3(xx4.b, yy4.b / 4, 0)); + afpvec4 v7 = image3d_ld4(bottom_blob_3d, ivec3(xx4.a, yy4.a / 4, 0)); v[0].r = v0[y4.r % 4]; v[0].g = v1[y4.g % 4]; @@ -133,7 +132,7 @@ void main() v[1].a = v7[zz4.a % 4]; } - image1d_st8(top_blob, gx, v); + image3d_st8(top_blob, ivec3(gx, 0, 0), v); #else #if NCNN_fp16_packed ivec4 v_offset; diff --git a/src/layer/vulkan/shader/flatten_pack8.comp b/src/layer/vulkan/shader/flatten_pack8.comp index 309e9d528..5158fe19f 100644 --- a/src/layer/vulkan/shader/flatten_pack8.comp +++ b/src/layer/vulkan/shader/flatten_pack8.comp @@ -36,9 +36,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; #else #if NCNN_fp16_packed layout (binding = 0) readonly buffer bottom_blob { sfpvec2 bottom_blob_data[]; }; @@ -85,14 +84,14 @@ void main() ivec4 yy4 = ii4 / psc(w); ivec4 xx4 = ii4 % psc(w); - afpvec8 v0 = image2d_ld8(bottom_blob_2d, ivec2(x4.r, y4.r / 8)); - afpvec8 v1 = image2d_ld8(bottom_blob_2d, ivec2(x4.g, y4.g / 8)); - afpvec8 v2 = image2d_ld8(bottom_blob_2d, ivec2(x4.b, y4.b / 8)); - afpvec8 v3 = image2d_ld8(bottom_blob_2d, ivec2(x4.a, y4.a / 8)); - afpvec8 v4 = image2d_ld8(bottom_blob_2d, ivec2(xx4.r, yy4.r / 8)); - afpvec8 v5 = image2d_ld8(bottom_blob_2d, ivec2(xx4.g, yy4.g / 8)); - afpvec8 v6 = image2d_ld8(bottom_blob_2d, ivec2(xx4.b, yy4.b / 8)); - afpvec8 v7 = image2d_ld8(bottom_blob_2d, ivec2(xx4.a, yy4.a / 8)); + afpvec8 v0 = image3d_ld8(bottom_blob_3d, ivec3(x4.r, y4.r / 8, 0)); + afpvec8 v1 = image3d_ld8(bottom_blob_3d, ivec3(x4.g, y4.g / 8, 0)); + afpvec8 v2 = image3d_ld8(bottom_blob_3d, ivec3(x4.b, y4.b / 8, 0)); + afpvec8 v3 = image3d_ld8(bottom_blob_3d, ivec3(x4.a, y4.a / 8, 0)); + afpvec8 v4 = image3d_ld8(bottom_blob_3d, ivec3(xx4.r, yy4.r / 8, 0)); + afpvec8 v5 = image3d_ld8(bottom_blob_3d, ivec3(xx4.g, yy4.g / 8, 0)); + afpvec8 v6 = image3d_ld8(bottom_blob_3d, ivec3(xx4.b, yy4.b / 8, 0)); + afpvec8 v7 = image3d_ld8(bottom_blob_3d, ivec3(xx4.a, yy4.a / 8, 0)); v[0].r = v0[(y4.r % 8) / 4][y4.r % 4]; v[0].g = v1[(y4.g % 8) / 4][y4.g % 4]; @@ -133,7 +132,7 @@ void main() v[1].a = v7[(zz4.a % 8) / 4][zz4.a % 4]; } - image1d_st8(top_blob, gx, v); + image3d_st8(top_blob, ivec3(gx, 0, 0), v); #else #if NCNN_fp16_packed ivec4 v_offset; diff --git a/src/layer/vulkan/shader/hardsigmoid.comp b/src/layer/vulkan/shader/hardsigmoid.comp index 1f2b35835..9118be459 100644 --- a/src/layer/vulkan/shader/hardsigmoid.comp +++ b/src/layer/vulkan/shader/hardsigmoid.comp @@ -61,19 +61,7 @@ void main() return; #if NCNN_image_shader - afp v; - if (psc(dims) == 1) - { - v = image1d_ld1(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -83,18 +71,7 @@ void main() v = clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st1(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st1(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/hardsigmoid_pack4.comp b/src/layer/vulkan/shader/hardsigmoid_pack4.comp index 37a0e769d..e31d16099 100644 --- a/src/layer/vulkan/shader/hardsigmoid_pack4.comp +++ b/src/layer/vulkan/shader/hardsigmoid_pack4.comp @@ -32,11 +32,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; @@ -61,19 +57,7 @@ void main() return; #if NCNN_image_shader - afpvec4 v; - if (psc(dims) == 1) - { - v = image1d_ld4(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -83,18 +67,7 @@ void main() v = clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st4(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st4(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/hardsigmoid_pack8.comp b/src/layer/vulkan/shader/hardsigmoid_pack8.comp index 749b43fca..597e84b68 100644 --- a/src/layer/vulkan/shader/hardsigmoid_pack8.comp +++ b/src/layer/vulkan/shader/hardsigmoid_pack8.comp @@ -33,11 +33,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; @@ -62,19 +58,7 @@ void main() return; #if NCNN_image_shader - afpvec8 v; - if (psc(dims) == 1) - { - v = image1d_ld8(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -85,18 +69,7 @@ void main() v[1] = clamp(v[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st8(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st8(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/hardswish.comp b/src/layer/vulkan/shader/hardswish.comp index 253c50ccb..79829e3fa 100644 --- a/src/layer/vulkan/shader/hardswish.comp +++ b/src/layer/vulkan/shader/hardswish.comp @@ -32,11 +32,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; @@ -61,19 +57,7 @@ void main() return; #if NCNN_image_shader - afp v; - if (psc(dims) == 1) - { - v = image1d_ld1(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -83,18 +67,7 @@ void main() v = v * clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st1(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st1(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/hardswish_pack4.comp b/src/layer/vulkan/shader/hardswish_pack4.comp index ca9164597..69cfa2388 100644 --- a/src/layer/vulkan/shader/hardswish_pack4.comp +++ b/src/layer/vulkan/shader/hardswish_pack4.comp @@ -32,11 +32,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; @@ -61,19 +57,7 @@ void main() return; #if NCNN_image_shader - afpvec4 v; - if (psc(dims) == 1) - { - v = image1d_ld4(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -83,18 +67,7 @@ void main() v = v * clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st4(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st4(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/hardswish_pack8.comp b/src/layer/vulkan/shader/hardswish_pack8.comp index c35e6c1e7..9dfabd8df 100644 --- a/src/layer/vulkan/shader/hardswish_pack8.comp +++ b/src/layer/vulkan/shader/hardswish_pack8.comp @@ -33,11 +33,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; @@ -62,19 +58,7 @@ void main() return; #if NCNN_image_shader - afpvec8 v; - if (psc(dims) == 1) - { - v = image1d_ld8(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -85,18 +69,7 @@ void main() v[1] = v[1] * clamp(v[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st8(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st8(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/innerproduct.comp b/src/layer/vulkan/shader/innerproduct.comp index f29ada86d..58c49d1c3 100644 --- a/src/layer/vulkan/shader/innerproduct.comp +++ b/src/layer/vulkan/shader/innerproduct.comp @@ -40,10 +40,10 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob; -layout (binding = 2) uniform unfp sampler2D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 0) uniform unfp sampler3D bottom_blob; +layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; +layout (binding = 2) uniform unfp sampler3D weight_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -80,7 +80,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gx); + sum = image3d_ld1(bias_blob, ivec3(gx, 0, 0)); #else sum = buffer_ld1(bias_data, gx); #endif @@ -93,7 +93,7 @@ void main() #if NCNN_image_shader for (int i = 0; i < psc(w); i++) { - sum += image2d_ld1(weight_blob, ivec2(i, gx)) * image1d_ld1(bottom_blob, i); + sum += image3d_ld1(weight_blob, ivec3(i, gx, 0)) * image3d_ld1(bottom_blob, ivec3(i, 0, 0)); } #else int w_offset = gx * psc(w); @@ -129,7 +129,7 @@ void main() } #if NCNN_image_shader - image1d_st1(top_blob, gx, sum); + image3d_st1(top_blob, ivec3(gx, 0, 0), sum); #else buffer_st1(top_blob_data, gx, sum); #endif diff --git a/src/layer/vulkan/shader/innerproduct_pack1to4.comp b/src/layer/vulkan/shader/innerproduct_pack1to4.comp index d69408d6f..d28eef207 100644 --- a/src/layer/vulkan/shader/innerproduct_pack1to4.comp +++ b/src/layer/vulkan/shader/innerproduct_pack1to4.comp @@ -40,10 +40,10 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; -layout (binding = 2) uniform unfp sampler2D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 0) uniform unfp sampler3D bottom_blob; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; +layout (binding = 2) uniform unfp sampler3D weight_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -80,7 +80,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gx); + sum = image3d_ld4(bias_blob, ivec3(gx, 0, 0)); #else sum = buffer_ld4(bias_data, gx); #endif @@ -93,9 +93,9 @@ void main() #if NCNN_image_shader for (int i = 0; i < psc(w); i++) { - afp v = image1d_ld1(bottom_blob, i); + afp v = image3d_ld1(bottom_blob, ivec3(i, 0, 0)); - afpvec4 k = image2d_ld4(weight_blob, ivec2(i, gx)); + afpvec4 k = image3d_ld4(weight_blob, ivec3(i, gx, 0)); sum += v * k; } @@ -137,7 +137,7 @@ void main() } #if NCNN_image_shader - image1d_st4(top_blob, gx, sum); + image3d_st4(top_blob, ivec3(gx, 0, 0), sum); #else buffer_st4(top_blob_data, gx, sum); #endif diff --git a/src/layer/vulkan/shader/innerproduct_pack1to8.comp b/src/layer/vulkan/shader/innerproduct_pack1to8.comp index 2b70f59cb..ec6a75499 100644 --- a/src/layer/vulkan/shader/innerproduct_pack1to8.comp +++ b/src/layer/vulkan/shader/innerproduct_pack1to8.comp @@ -41,10 +41,10 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; -layout (binding = 2) uniform unfp sampler2D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 0) uniform unfp sampler3D bottom_blob; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; +layout (binding = 2) uniform unfp sampler3D weight_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -81,7 +81,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gx); + sum = image3d_ld8(bias_blob, ivec3(gx, 0, 0)); #else sum = buffer_ld8(bias_data, gx); #endif @@ -96,9 +96,9 @@ void main() for (int i = 0; i < psc(w); i++) { - afp v = image1d_ld1(bottom_blob, i); + afp v = image3d_ld1(bottom_blob, ivec3(i, 0, 0)); - afpvec8 k = image2d_ld8(weight_blob, ivec2(i, gx)); + afpvec8 k = image3d_ld8(weight_blob, ivec3(i, gx, 0)); // sum += v * k; sum[0] += v * k[0]; @@ -149,7 +149,7 @@ void main() } #if NCNN_image_shader - image1d_st8(top_blob, gx, sum); + image3d_st8(top_blob, ivec3(gx, 0, 0), sum); #else buffer_st8(top_blob_data, gx, sum); #endif diff --git a/src/layer/vulkan/shader/innerproduct_pack4.comp b/src/layer/vulkan/shader/innerproduct_pack4.comp index bb3bad20d..d611d578a 100644 --- a/src/layer/vulkan/shader/innerproduct_pack4.comp +++ b/src/layer/vulkan/shader/innerproduct_pack4.comp @@ -40,10 +40,10 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; -layout (binding = 2) uniform unfp sampler2D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 0) uniform unfp sampler3D bottom_blob; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; +layout (binding = 2) uniform unfp sampler3D weight_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -85,7 +85,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gx); + sum = image3d_ld4(bias_blob, ivec3(gx, 0, 0)); #else sum = buffer_ld4(bias_data, gx); #endif @@ -100,12 +100,12 @@ void main() for (int i = 0; i < psc(w); i++) { - afpvec4 v = image1d_ld4(bottom_blob, i); + afpvec4 v = image3d_ld4(bottom_blob, ivec3(i, 0, 0)); afpmat4 k = afpmat4( - image2d_ld4(weight_blob, ivec2(wx + 0, gx)), - image2d_ld4(weight_blob, ivec2(wx + 1, gx)), - image2d_ld4(weight_blob, ivec2(wx + 2, gx)), - image2d_ld4(weight_blob, ivec2(wx + 3, gx)) + image3d_ld4(weight_blob, ivec3(wx + 0, gx, 0)), + image3d_ld4(weight_blob, ivec3(wx + 1, gx, 0)), + image3d_ld4(weight_blob, ivec3(wx + 2, gx, 0)), + image3d_ld4(weight_blob, ivec3(wx + 3, gx, 0)) ); sum += v * k; @@ -160,7 +160,7 @@ void main() } #if NCNN_image_shader - image1d_st4(top_blob, gx, sum); + image3d_st4(top_blob, ivec3(gx, 0, 0), sum); #else buffer_st4(top_blob_data, gx, sum); #endif diff --git a/src/layer/vulkan/shader/innerproduct_pack4to1.comp b/src/layer/vulkan/shader/innerproduct_pack4to1.comp index cda6517eb..8d13968c8 100644 --- a/src/layer/vulkan/shader/innerproduct_pack4to1.comp +++ b/src/layer/vulkan/shader/innerproduct_pack4to1.comp @@ -40,10 +40,10 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob; -layout (binding = 2) uniform unfp sampler2D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 0) uniform unfp sampler3D bottom_blob; +layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; +layout (binding = 2) uniform unfp sampler3D weight_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -80,7 +80,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gx); + sum = image3d_ld1(bias_blob, ivec3(gx, 0, 0)); #else sum = buffer_ld1(bias_data, gx); #endif @@ -93,9 +93,9 @@ void main() #if NCNN_image_shader for (int i = 0; i < psc(w); i++) { - afpvec4 v = image1d_ld4(bottom_blob, i); + afpvec4 v = image3d_ld4(bottom_blob, ivec3(i, 0, 0)); - afpvec4 k = image2d_ld4(weight_blob, ivec2(i, gx)); + afpvec4 k = image3d_ld4(weight_blob, ivec3(i, gx, 0)); sum += dot(v, k); } @@ -137,7 +137,7 @@ void main() } #if NCNN_image_shader - image1d_st1(top_blob, gx, sum); + image3d_st1(top_blob, ivec3(gx, 0, 0), sum); #else buffer_st1(top_blob_data, gx, sum); #endif diff --git a/src/layer/vulkan/shader/innerproduct_pack4to8.comp b/src/layer/vulkan/shader/innerproduct_pack4to8.comp index da1de61d6..3ef83d1ea 100644 --- a/src/layer/vulkan/shader/innerproduct_pack4to8.comp +++ b/src/layer/vulkan/shader/innerproduct_pack4to8.comp @@ -41,10 +41,10 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; -layout (binding = 2) uniform unfp sampler2D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 0) uniform unfp sampler3D bottom_blob; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; +layout (binding = 2) uniform unfp sampler3D weight_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -81,7 +81,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gx); + sum = image3d_ld8(bias_blob, ivec3(gx, 0, 0)); #else sum = buffer_ld8(bias_data, gx); #endif @@ -96,16 +96,16 @@ void main() for (int i = 0; i < psc(w); i++) { - afpvec4 v = image1d_ld4(bottom_blob, i); + afpvec4 v = image3d_ld4(bottom_blob, ivec3(i, 0, 0)); - afpvec4 k0 = image2d_ld4(weight_blob, ivec2(wx + 0, gx)); - afpvec4 k1 = image2d_ld4(weight_blob, ivec2(wx + 1, gx)); - afpvec4 k2 = image2d_ld4(weight_blob, ivec2(wx + 2, gx)); - afpvec4 k3 = image2d_ld4(weight_blob, ivec2(wx + 3, gx)); - afpvec4 k4 = image2d_ld4(weight_blob, ivec2(wx + 4, gx)); - afpvec4 k5 = image2d_ld4(weight_blob, ivec2(wx + 5, gx)); - afpvec4 k6 = image2d_ld4(weight_blob, ivec2(wx + 6, gx)); - afpvec4 k7 = image2d_ld4(weight_blob, ivec2(wx + 7, gx)); + afpvec4 k0 = image3d_ld4(weight_blob, ivec3(wx + 0, gx, 0)); + afpvec4 k1 = image3d_ld4(weight_blob, ivec3(wx + 1, gx, 0)); + afpvec4 k2 = image3d_ld4(weight_blob, ivec3(wx + 2, gx, 0)); + afpvec4 k3 = image3d_ld4(weight_blob, ivec3(wx + 3, gx, 0)); + afpvec4 k4 = image3d_ld4(weight_blob, ivec3(wx + 4, gx, 0)); + afpvec4 k5 = image3d_ld4(weight_blob, ivec3(wx + 5, gx, 0)); + afpvec4 k6 = image3d_ld4(weight_blob, ivec3(wx + 6, gx, 0)); + afpvec4 k7 = image3d_ld4(weight_blob, ivec3(wx + 7, gx, 0)); // sum += v * k; sum[0].r += dot(v, k0); @@ -177,7 +177,7 @@ void main() } #if NCNN_image_shader - image1d_st8(top_blob, gx, sum); + image3d_st8(top_blob, ivec3(gx, 0, 0), sum); #else buffer_st8(top_blob_data, gx, sum); #endif diff --git a/src/layer/vulkan/shader/innerproduct_pack8.comp b/src/layer/vulkan/shader/innerproduct_pack8.comp index 56ee2490e..605cd07d5 100644 --- a/src/layer/vulkan/shader/innerproduct_pack8.comp +++ b/src/layer/vulkan/shader/innerproduct_pack8.comp @@ -41,10 +41,10 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; -layout (binding = 2) uniform unfp sampler2D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 0) uniform unfp sampler3D bottom_blob; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; +layout (binding = 2) uniform unfp sampler3D weight_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -81,7 +81,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld8(bias_blob, gx); + sum = image3d_ld8(bias_blob, ivec3(gx, 0, 0)); #else sum = buffer_ld8(bias_data, gx); #endif @@ -96,16 +96,16 @@ void main() for (int i = 0; i < psc(w); i++) { - afpvec8 v = image1d_ld8(bottom_blob, i); + afpvec8 v = image3d_ld8(bottom_blob, ivec3(i, 0, 0)); - afpvec8 k0 = image2d_ld8(weight_blob, ivec2(wx + 0, gx)); - afpvec8 k1 = image2d_ld8(weight_blob, ivec2(wx + 1, gx)); - afpvec8 k2 = image2d_ld8(weight_blob, ivec2(wx + 2, gx)); - afpvec8 k3 = image2d_ld8(weight_blob, ivec2(wx + 3, gx)); - afpvec8 k4 = image2d_ld8(weight_blob, ivec2(wx + 4, gx)); - afpvec8 k5 = image2d_ld8(weight_blob, ivec2(wx + 5, gx)); - afpvec8 k6 = image2d_ld8(weight_blob, ivec2(wx + 6, gx)); - afpvec8 k7 = image2d_ld8(weight_blob, ivec2(wx + 7, gx)); + afpvec8 k0 = image3d_ld8(weight_blob, ivec3(wx + 0, gx, 0)); + afpvec8 k1 = image3d_ld8(weight_blob, ivec3(wx + 1, gx, 0)); + afpvec8 k2 = image3d_ld8(weight_blob, ivec3(wx + 2, gx, 0)); + afpvec8 k3 = image3d_ld8(weight_blob, ivec3(wx + 3, gx, 0)); + afpvec8 k4 = image3d_ld8(weight_blob, ivec3(wx + 4, gx, 0)); + afpvec8 k5 = image3d_ld8(weight_blob, ivec3(wx + 5, gx, 0)); + afpvec8 k6 = image3d_ld8(weight_blob, ivec3(wx + 6, gx, 0)); + afpvec8 k7 = image3d_ld8(weight_blob, ivec3(wx + 7, gx, 0)); // sum += v * k sum[0].r += dot(v[0], k0[0]) + dot(v[1], k0[1]); @@ -177,7 +177,7 @@ void main() } #if NCNN_image_shader - image1d_st8(top_blob, gx, sum); + image3d_st8(top_blob, ivec3(gx, 0, 0), sum); #else buffer_st8(top_blob_data, gx, sum); #endif diff --git a/src/layer/vulkan/shader/innerproduct_pack8to1.comp b/src/layer/vulkan/shader/innerproduct_pack8to1.comp index 82462bada..8dff04ac2 100644 --- a/src/layer/vulkan/shader/innerproduct_pack8to1.comp +++ b/src/layer/vulkan/shader/innerproduct_pack8to1.comp @@ -41,10 +41,10 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob; -layout (binding = 2) uniform unfp sampler2D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 0) uniform unfp sampler3D bottom_blob; +layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; +layout (binding = 2) uniform unfp sampler3D weight_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -81,7 +81,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld1(bias_blob, gx); + sum = image3d_ld1(bias_blob, ivec3(gx, 0, 0)); #else sum = buffer_ld1(bias_data, gx); #endif @@ -94,9 +94,9 @@ void main() #if NCNN_image_shader for (int i = 0; i < psc(w); i++) { - afpvec8 v = image1d_ld8(bottom_blob, i); + afpvec8 v = image3d_ld8(bottom_blob, ivec3(i, 0, 0)); - afpvec8 k = image2d_ld8(weight_blob, ivec2(i, gx)); + afpvec8 k = image3d_ld8(weight_blob, ivec3(i, gx, 0)); // sum += dot(v, k); sum += dot(v[0], k[0]) + dot(v[1], k[1]); @@ -140,7 +140,7 @@ void main() } #if NCNN_image_shader - image1d_st1(top_blob, gx, sum); + image3d_st1(top_blob, ivec3(gx, 0, 0), sum); #else buffer_st1(top_blob_data, gx, sum); #endif diff --git a/src/layer/vulkan/shader/innerproduct_pack8to4.comp b/src/layer/vulkan/shader/innerproduct_pack8to4.comp index 2f9b03ca7..a5fd35bcd 100644 --- a/src/layer/vulkan/shader/innerproduct_pack8to4.comp +++ b/src/layer/vulkan/shader/innerproduct_pack8to4.comp @@ -41,10 +41,10 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; -layout (binding = 2) uniform unfp sampler2D weight_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 0) uniform unfp sampler3D bottom_blob; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; +layout (binding = 2) uniform unfp sampler3D weight_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -81,7 +81,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - sum = image1d_ld4(bias_blob, gx); + sum = image3d_ld4(bias_blob, ivec3(gx, 0, 0)); #else sum = buffer_ld4(bias_data, gx); #endif @@ -96,12 +96,12 @@ void main() for (int i = 0; i < psc(w); i++) { - afpvec8 v = image1d_ld8(bottom_blob, i); + afpvec8 v = image3d_ld8(bottom_blob, ivec3(i, 0, 0)); - afpvec8 k0 = image2d_ld8(weight_blob, ivec2(wx + 0, gx)); - afpvec8 k1 = image2d_ld8(weight_blob, ivec2(wx + 1, gx)); - afpvec8 k2 = image2d_ld8(weight_blob, ivec2(wx + 2, gx)); - afpvec8 k3 = image2d_ld8(weight_blob, ivec2(wx + 3, gx)); + afpvec8 k0 = image3d_ld8(weight_blob, ivec3(wx + 0, gx, 0)); + afpvec8 k1 = image3d_ld8(weight_blob, ivec3(wx + 1, gx, 0)); + afpvec8 k2 = image3d_ld8(weight_blob, ivec3(wx + 2, gx, 0)); + afpvec8 k3 = image3d_ld8(weight_blob, ivec3(wx + 3, gx, 0)); // sum += v * k sum.r += dot(v[0], k0[0]) + dot(v[1], k0[1]); @@ -156,7 +156,7 @@ void main() } #if NCNN_image_shader - image1d_st4(top_blob, gx, sum); + image3d_st4(top_blob, ivec3(gx, 0, 0), sum); #else buffer_st4(top_blob_data, gx, sum); #endif diff --git a/src/layer/vulkan/shader/instancenorm_coeffs.comp b/src/layer/vulkan/shader/instancenorm_coeffs.comp index 5a74fa455..4f193f1d1 100644 --- a/src/layer/vulkan/shader/instancenorm_coeffs.comp +++ b/src/layer/vulkan/shader/instancenorm_coeffs.comp @@ -26,11 +26,11 @@ layout (constant_id = 1) const int affine = 0; layout (constant_id = 2) const int w = 0; #if NCNN_image_shader -layout (binding = 0, imfmtc1) writeonly uniform unfp image1D coeffs_blob; -layout (binding = 1) uniform highp sampler1D mean_blob; -layout (binding = 2) uniform highp sampler1D var_blob; -layout (binding = 3) uniform unfp sampler1D gamma_blob; -layout (binding = 4) uniform unfp sampler1D beta_blob; +layout (binding = 0, imfmtc1) writeonly uniform unfp image3D coeffs_blob; +layout (binding = 1) uniform highp sampler3D mean_blob; +layout (binding = 2) uniform highp sampler3D var_blob; +layout (binding = 3) uniform unfp sampler3D gamma_blob; +layout (binding = 4) uniform unfp sampler3D beta_blob; #else layout (binding = 0) writeonly buffer coeffs_blob { sfp coeffs_blob_data[]; }; layout (binding = 1) readonly buffer mean_blob { float mean_data[]; }; @@ -54,8 +54,8 @@ void main() return; #if NCNN_image_shader - float mean = texelFetch(mean_blob, gx, 0).r; - float var = texelFetch(var_blob, gx, 0).r; + float mean = texelFetch(mean_blob, ivec3(gx, 0, 0), 0).r; + float var = texelFetch(var_blob, ivec3(gx, 0, 0), 0).r; #else float mean = mean_data[gx]; float var = var_data[gx]; @@ -71,8 +71,8 @@ void main() else { #if NCNN_image_shader - float gamma = float(image1d_ld1(gamma_blob, gx)); - float beta = float(image1d_ld1(beta_blob, gx)); + float gamma = float(image3d_ld1(gamma_blob, ivec3(gx, 0, 0))); + float beta = float(image3d_ld1(beta_blob, ivec3(gx, 0, 0))); #else float gamma = float(buffer_ld1(gamma_data, gx)); float beta = float(buffer_ld1(beta_data, gx)); @@ -83,8 +83,8 @@ void main() } #if NCNN_image_shader - imageStore(coeffs_blob, gx*2, vec4(a)); - imageStore(coeffs_blob, gx*2 +1, vec4(b)); + imageStore(coeffs_blob, ivec3(gx*2, 0, 0), vec4(a)); + imageStore(coeffs_blob, ivec3(gx*2 +1, 0, 0), vec4(b)); #else buffer_st1(coeffs_blob_data, gx*2, afp(a)); buffer_st1(coeffs_blob_data, gx*2 +1, afp(b)); diff --git a/src/layer/vulkan/shader/instancenorm_coeffs_pack4.comp b/src/layer/vulkan/shader/instancenorm_coeffs_pack4.comp index 96769f4c9..4c385c68b 100644 --- a/src/layer/vulkan/shader/instancenorm_coeffs_pack4.comp +++ b/src/layer/vulkan/shader/instancenorm_coeffs_pack4.comp @@ -26,11 +26,11 @@ layout (constant_id = 1) const int affine = 0; layout (constant_id = 2) const int w = 0; #if NCNN_image_shader -layout (binding = 0, imfmtc4) writeonly uniform unfp image1D coeffs_blob; -layout (binding = 1) uniform highp sampler1D mean_blob; -layout (binding = 2) uniform highp sampler1D var_blob; -layout (binding = 3) uniform unfp sampler1D gamma_blob; -layout (binding = 4) uniform unfp sampler1D beta_blob; +layout (binding = 0, imfmtc4) writeonly uniform unfp image3D coeffs_blob; +layout (binding = 1) uniform highp sampler3D mean_blob; +layout (binding = 2) uniform highp sampler3D var_blob; +layout (binding = 3) uniform unfp sampler3D gamma_blob; +layout (binding = 4) uniform unfp sampler3D beta_blob; #else layout (binding = 0) writeonly buffer coeffs_blob { sfpvec4 coeffs_blob_data[]; }; layout (binding = 1) readonly buffer mean_blob { vec4 mean_data[]; }; @@ -54,8 +54,8 @@ void main() return; #if NCNN_image_shader - vec4 mean = texelFetch(mean_blob, gx, 0); - vec4 var = texelFetch(var_blob, gx, 0); + vec4 mean = texelFetch(mean_blob, ivec3(gx, 0, 0), 0); + vec4 var = texelFetch(var_blob, ivec3(gx, 0, 0), 0); #else vec4 mean = mean_data[gx]; vec4 var = var_data[gx]; @@ -71,8 +71,8 @@ void main() else { #if NCNN_image_shader - vec4 gamma = vec4(image1d_ld4(gamma_blob, gx)); - vec4 beta = vec4(image1d_ld4(beta_blob, gx)); + vec4 gamma = vec4(image3d_ld4(gamma_blob, ivec3(gx, 0, 0))); + vec4 beta = vec4(image3d_ld4(beta_blob, ivec3(gx, 0, 0))); #else vec4 gamma = vec4(buffer_ld4(gamma_data, gx)); vec4 beta = vec4(buffer_ld4(beta_data, gx)); @@ -83,8 +83,8 @@ void main() } #if NCNN_image_shader - imageStore(coeffs_blob, gx*2, a); - imageStore(coeffs_blob, gx*2 +1, b); + imageStore(coeffs_blob, ivec3(gx*2, 0, 0), a); + imageStore(coeffs_blob, ivec3(gx*2 +1, 0, 0), b); #else buffer_st4(coeffs_blob_data, gx*2, afpvec4(a)); buffer_st4(coeffs_blob_data, gx*2 +1, afpvec4(b)); diff --git a/src/layer/vulkan/shader/instancenorm_coeffs_pack8.comp b/src/layer/vulkan/shader/instancenorm_coeffs_pack8.comp index ed1658c66..cd7a10e58 100644 --- a/src/layer/vulkan/shader/instancenorm_coeffs_pack8.comp +++ b/src/layer/vulkan/shader/instancenorm_coeffs_pack8.comp @@ -27,11 +27,11 @@ layout (constant_id = 1) const int affine = 0; layout (constant_id = 2) const int w = 0; #if NCNN_image_shader -layout (binding = 0, imfmtc4) writeonly uniform unfp image1D coeffs_blob; -layout (binding = 1) uniform highp sampler1D mean_blob; -layout (binding = 2) uniform highp sampler1D var_blob; -layout (binding = 3) uniform unfp sampler1D gamma_blob; -layout (binding = 4) uniform unfp sampler1D beta_blob; +layout (binding = 0, imfmtc4) writeonly uniform unfp image3D coeffs_blob; +layout (binding = 1) uniform highp sampler3D mean_blob; +layout (binding = 2) uniform highp sampler3D var_blob; +layout (binding = 3) uniform unfp sampler3D gamma_blob; +layout (binding = 4) uniform unfp sampler3D beta_blob; #else layout (binding = 0) writeonly buffer coeffs_blob { sfpvec8 coeffs_blob_data[]; }; layout (binding = 1) readonly buffer mean_blob { mat2x4 mean_data[]; }; @@ -55,8 +55,8 @@ void main() return; #if NCNN_image_shader - mat2x4 mean = mat2x4(texelFetch(mean_blob, gx * 2, 0), texelFetch(mean_blob, gx * 2 + 1, 0)); - mat2x4 var = mat2x4(texelFetch(var_blob, gx * 2, 0), texelFetch(var_blob, gx * 2 + 1, 0)); + mat2x4 mean = mat2x4(texelFetch(mean_blob, ivec3(gx * 2, 0, 0), 0), texelFetch(mean_blob, ivec3(gx * 2 + 1, 0, 0), 0)); + mat2x4 var = mat2x4(texelFetch(var_blob, ivec3(gx * 2, 0, 0), 0), texelFetch(var_blob, ivec3(gx * 2 + 1, 0, 0), 0)); #else mat2x4 mean = mean_data[gx]; mat2x4 var = var_data[gx]; @@ -74,8 +74,8 @@ void main() else { #if NCNN_image_shader - mat2x4 gamma = mat2x4(image1d_ld8(gamma_blob, gx)); - mat2x4 beta = mat2x4(image1d_ld8(beta_blob, gx)); + mat2x4 gamma = mat2x4(image3d_ld8(gamma_blob, ivec3(gx, 0, 0))); + mat2x4 beta = mat2x4(image3d_ld8(beta_blob, ivec3(gx, 0, 0))); #else mat2x4 gamma = mat2x4(buffer_ld8(gamma_data, gx)); mat2x4 beta = mat2x4(buffer_ld8(beta_data, gx)); @@ -88,10 +88,10 @@ void main() } #if NCNN_image_shader - imageStore(coeffs_blob, (gx*2) * 2, a[0]); - imageStore(coeffs_blob, (gx*2) * 2 + 1, a[1]); - imageStore(coeffs_blob, (gx*2 +1) * 2, b[0]); - imageStore(coeffs_blob, (gx*2 +1) * 2 + 1, b[1]); + imageStore(coeffs_blob, ivec3((gx*2) * 2, 0, 0), a[0]); + imageStore(coeffs_blob, ivec3((gx*2) * 2 + 1, 0, 0), a[1]); + imageStore(coeffs_blob, ivec3((gx*2 +1) * 2, 0, 0), b[0]); + imageStore(coeffs_blob, ivec3((gx*2 +1) * 2 + 1, 0, 0), b[1]); #else buffer_st8(coeffs_blob_data, gx*2, afpvec8(a)); buffer_st8(coeffs_blob_data, gx*2 +1, afpvec8(b)); diff --git a/src/layer/vulkan/shader/instancenorm_norm.comp b/src/layer/vulkan/shader/instancenorm_norm.comp index cc0361550..990ece362 100644 --- a/src/layer/vulkan/shader/instancenorm_norm.comp +++ b/src/layer/vulkan/shader/instancenorm_norm.comp @@ -31,7 +31,7 @@ layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler1D coeffs_blob; +layout (binding = 2) uniform unfp sampler3D coeffs_blob; #else layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer coeffs_blob { sfp coeffs_blob_data[]; }; @@ -58,8 +58,8 @@ void main() #if NCNN_image_shader afp v = image3d_ld1(bottom_blob, ivec3(gx, gy, gz)); - afp a = image1d_ld1(coeffs_blob, gz*2); - afp b = image1d_ld1(coeffs_blob, gz*2 +1); + afp a = image3d_ld1(coeffs_blob, ivec3(gz*2, 0, 0)); + afp b = image3d_ld1(coeffs_blob, ivec3(gz*2 +1, 0, 0)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; diff --git a/src/layer/vulkan/shader/instancenorm_norm_pack4.comp b/src/layer/vulkan/shader/instancenorm_norm_pack4.comp index 7e7e74df6..c6a659d6d 100644 --- a/src/layer/vulkan/shader/instancenorm_norm_pack4.comp +++ b/src/layer/vulkan/shader/instancenorm_norm_pack4.comp @@ -31,7 +31,7 @@ layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler1D coeffs_blob; +layout (binding = 2) uniform unfp sampler3D coeffs_blob; #else layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer coeffs_blob { sfpvec4 coeffs_blob_data[]; }; @@ -58,8 +58,8 @@ void main() #if NCNN_image_shader afpvec4 v = image3d_ld4(bottom_blob, ivec3(gx, gy, gz)); - afpvec4 a = image1d_ld4(coeffs_blob, gz*2); - afpvec4 b = image1d_ld4(coeffs_blob, gz*2 +1); + afpvec4 a = image3d_ld4(coeffs_blob, ivec3(gz*2, 0, 0)); + afpvec4 b = image3d_ld4(coeffs_blob, ivec3(gz*2 +1, 0, 0)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; diff --git a/src/layer/vulkan/shader/instancenorm_norm_pack8.comp b/src/layer/vulkan/shader/instancenorm_norm_pack8.comp index 28a79207e..790036404 100644 --- a/src/layer/vulkan/shader/instancenorm_norm_pack8.comp +++ b/src/layer/vulkan/shader/instancenorm_norm_pack8.comp @@ -32,7 +32,7 @@ layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler1D coeffs_blob; +layout (binding = 2) uniform unfp sampler3D coeffs_blob; #else layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer coeffs_blob { sfpvec8 coeffs_blob_data[]; }; @@ -59,8 +59,8 @@ void main() #if NCNN_image_shader afpvec8 v = image3d_ld8(bottom_blob, ivec3(gx, gy, gz)); - afpvec8 a = image1d_ld8(coeffs_blob, gz*2); - afpvec8 b = image1d_ld8(coeffs_blob, gz*2 +1); + afpvec8 a = image3d_ld8(coeffs_blob, ivec3(gz*2, 0, 0)); + afpvec8 b = image3d_ld8(coeffs_blob, ivec3(gz*2 +1, 0, 0)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; diff --git a/src/layer/vulkan/shader/instancenorm_reduce_mean.comp b/src/layer/vulkan/shader/instancenorm_reduce_mean.comp index dea9fa735..d54871074 100644 --- a/src/layer/vulkan/shader/instancenorm_reduce_mean.comp +++ b/src/layer/vulkan/shader/instancenorm_reduce_mean.comp @@ -29,7 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int cstep = 0; #if NCNN_image_shader layout (binding = 0) uniform highp sampler3D bottom_top_blob; -layout (binding = 1, r32f) writeonly uniform highp image1D mean_blob; +layout (binding = 1, r32f) writeonly uniform highp image3D mean_blob; #else layout (binding = 0) readonly buffer bottom_top_blob { float bottom_top_blob_data[]; }; layout (binding = 1) writeonly buffer mean_blob { float mean_data[]; }; @@ -76,7 +76,7 @@ void main() float mean = sum / p.area; #if NCNN_image_shader - imageStore(mean_blob, gx, vec4(mean)); + imageStore(mean_blob, ivec3(gx, 0, 0), vec4(mean)); #else mean_data[gx] = mean; #endif diff --git a/src/layer/vulkan/shader/instancenorm_reduce_mean_pack4.comp b/src/layer/vulkan/shader/instancenorm_reduce_mean_pack4.comp index be8b9b29d..b159d836e 100644 --- a/src/layer/vulkan/shader/instancenorm_reduce_mean_pack4.comp +++ b/src/layer/vulkan/shader/instancenorm_reduce_mean_pack4.comp @@ -29,7 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int cstep = 0; #if NCNN_image_shader layout (binding = 0) uniform highp sampler3D bottom_top_blob; -layout (binding = 1, rgba32f) writeonly uniform highp image1D mean_blob; +layout (binding = 1, rgba32f) writeonly uniform highp image3D mean_blob; #else layout (binding = 0) readonly buffer bottom_top_blob { vec4 bottom_top_blob_data[]; }; layout (binding = 1) writeonly buffer mean_blob { vec4 mean_data[]; }; @@ -76,7 +76,7 @@ void main() vec4 mean = sum / p.area; #if NCNN_image_shader - imageStore(mean_blob, gx, mean); + imageStore(mean_blob, ivec3(gx, 0, 0), mean); #else mean_data[gx] = mean; #endif diff --git a/src/layer/vulkan/shader/instancenorm_reduce_mean_pack8.comp b/src/layer/vulkan/shader/instancenorm_reduce_mean_pack8.comp index 9671835fe..a58373529 100644 --- a/src/layer/vulkan/shader/instancenorm_reduce_mean_pack8.comp +++ b/src/layer/vulkan/shader/instancenorm_reduce_mean_pack8.comp @@ -30,7 +30,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int cstep = 0; #if NCNN_image_shader layout (binding = 0) uniform highp sampler3D bottom_top_blob; -layout (binding = 1, rgba32f) writeonly uniform highp image1D mean_blob; +layout (binding = 1, rgba32f) writeonly uniform highp image3D mean_blob; #else layout (binding = 0) readonly buffer bottom_top_blob { mat2x4 bottom_top_blob_data[]; }; layout (binding = 1) writeonly buffer mean_blob { mat2x4 mean_data[]; }; @@ -79,8 +79,8 @@ void main() mean[1] = sum[1] / p.area; #if NCNN_image_shader - imageStore(mean_blob, gx * 2, mean[0]); - imageStore(mean_blob, gx * 2 + 1, mean[1]); + imageStore(mean_blob, ivec3(gx * 2, 0, 0), mean[0]); + imageStore(mean_blob, ivec3(gx * 2 + 1, 0, 0), mean[1]); #else mean_data[gx] = mean; #endif diff --git a/src/layer/vulkan/shader/instancenorm_sub_mean_square.comp b/src/layer/vulkan/shader/instancenorm_sub_mean_square.comp index 76c0ee2b5..bb53ebcfe 100644 --- a/src/layer/vulkan/shader/instancenorm_sub_mean_square.comp +++ b/src/layer/vulkan/shader/instancenorm_sub_mean_square.comp @@ -36,7 +36,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_top_blob; -layout (binding = 1) uniform highp sampler1D mean_blob; +layout (binding = 1) uniform highp sampler3D mean_blob; layout (binding = 2, r32f) writeonly uniform highp image3D square_blob; #else layout (binding = 0) readonly buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; @@ -70,7 +70,7 @@ void main() #if NCNN_image_shader float v = float(image3d_ld1(bottom_top_blob, ivec3(gx, gy, gz))); - float mean = texelFetch(mean_blob, gz, 0).r; + float mean = texelFetch(mean_blob, ivec3(gz, 0, 0), 0).r; #else int v_offset = gz * psc(cstep) + gy * psc(w) + gx; diff --git a/src/layer/vulkan/shader/instancenorm_sub_mean_square_pack4.comp b/src/layer/vulkan/shader/instancenorm_sub_mean_square_pack4.comp index 96cbcf22c..46c218ba2 100644 --- a/src/layer/vulkan/shader/instancenorm_sub_mean_square_pack4.comp +++ b/src/layer/vulkan/shader/instancenorm_sub_mean_square_pack4.comp @@ -36,7 +36,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_top_blob; -layout (binding = 1) uniform highp sampler1D mean_blob; +layout (binding = 1) uniform highp sampler3D mean_blob; layout (binding = 2, rgba32f) writeonly uniform highp image3D square_blob; #else layout (binding = 0) readonly buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; @@ -70,7 +70,7 @@ void main() #if NCNN_image_shader vec4 v = vec4(image3d_ld4(bottom_top_blob, ivec3(gx, gy, gz))); - vec4 mean = texelFetch(mean_blob, gz, 0); + vec4 mean = texelFetch(mean_blob, ivec3(gz, 0, 0), 0); #else int v_offset = gz * psc(cstep) + gy * psc(w) + gx; diff --git a/src/layer/vulkan/shader/instancenorm_sub_mean_square_pack8.comp b/src/layer/vulkan/shader/instancenorm_sub_mean_square_pack8.comp index 5641b98ee..2f58d8781 100644 --- a/src/layer/vulkan/shader/instancenorm_sub_mean_square_pack8.comp +++ b/src/layer/vulkan/shader/instancenorm_sub_mean_square_pack8.comp @@ -37,7 +37,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_top_blob; -layout (binding = 1) uniform highp sampler1D mean_blob; +layout (binding = 1) uniform highp sampler3D mean_blob; layout (binding = 2, rgba32f) writeonly uniform highp image3D square_blob; #else layout (binding = 0) readonly buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; @@ -71,7 +71,7 @@ void main() #if NCNN_image_shader mat2x4 v = mat2x4(image3d_ld8(bottom_top_blob, ivec3(gx, gy, gz))); - mat2x4 mean = mat2x4(texelFetch(mean_blob, gz * 2, 0), texelFetch(mean_blob, gz * 2 + 1, 0)); + mat2x4 mean = mat2x4(texelFetch(mean_blob, ivec3(gz * 2, 0, 0), 0), texelFetch(mean_blob, ivec3(gz * 2 + 1, 0, 0), 0)); #else int v_offset = gz * psc(cstep) + gy * psc(w) + gx; diff --git a/src/layer/vulkan/shader/normalize_coeffs.comp b/src/layer/vulkan/shader/normalize_coeffs.comp index 84e4de149..9e74876cf 100644 --- a/src/layer/vulkan/shader/normalize_coeffs.comp +++ b/src/layer/vulkan/shader/normalize_coeffs.comp @@ -28,7 +28,7 @@ layout (constant_id = 3) const int eps_mode = 0; #if NCNN_image_shader layout (binding = 0) uniform highp sampler3D sqsum_blob; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D coeffs_blob; +layout (binding = 1, imfmtc1) writeonly uniform unfp image3D coeffs_blob; #else layout (binding = 0) readonly buffer sqsum_blob { float sqsum_blob_data[]; }; layout (binding = 1) writeonly buffer coeffs_blob { sfp coeffs_blob_data[]; }; @@ -79,7 +79,7 @@ void main() #if NCNN_image_shader int gi = gz * p.w * p.h + gy * p.w + gx; - image1d_st1(coeffs_blob, gi, a); + image3d_st1(coeffs_blob, ivec3(gi, 0, 0), a); #else int gi = gz * p.w + gx; diff --git a/src/layer/vulkan/shader/normalize_coeffs_pack4.comp b/src/layer/vulkan/shader/normalize_coeffs_pack4.comp index aa9914374..2bafa5996 100644 --- a/src/layer/vulkan/shader/normalize_coeffs_pack4.comp +++ b/src/layer/vulkan/shader/normalize_coeffs_pack4.comp @@ -28,7 +28,7 @@ layout (constant_id = 3) const int eps_mode = 0; #if NCNN_image_shader layout (binding = 0) uniform highp sampler3D sqsum_blob; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D coeffs_blob; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D coeffs_blob; #else layout (binding = 0) readonly buffer sqsum_blob { vec4 sqsum_blob_data[]; }; layout (binding = 1) writeonly buffer coeffs_blob { sfpvec4 coeffs_blob_data[]; }; @@ -83,7 +83,7 @@ void main() #if NCNN_image_shader int gi = gz * p.w * p.h + gy * p.w + gx; - image1d_st4(coeffs_blob, gi, a); + image3d_st4(coeffs_blob, ivec3(gi, 0, 0), a); #else int gi = gz * p.w + gx; @@ -115,7 +115,7 @@ void main() #if NCNN_image_shader int gi = gz * p.w * p.h + gy * p.w + gx; - image1d_st4(coeffs_blob, gi, afpvec4(a)); + image3d_st4(coeffs_blob, ivec3(gi, 0, 0), afpvec4(a)); #else int gi = gz * p.w + gx; diff --git a/src/layer/vulkan/shader/normalize_coeffs_pack8.comp b/src/layer/vulkan/shader/normalize_coeffs_pack8.comp index 39eb8f907..705f6084e 100644 --- a/src/layer/vulkan/shader/normalize_coeffs_pack8.comp +++ b/src/layer/vulkan/shader/normalize_coeffs_pack8.comp @@ -29,7 +29,7 @@ layout (constant_id = 3) const int eps_mode = 0; #if NCNN_image_shader layout (binding = 0) uniform highp sampler3D sqsum_blob; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D coeffs_blob; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D coeffs_blob; #else layout (binding = 0) readonly buffer sqsum_blob { mat2x4 sqsum_blob_data[]; }; layout (binding = 1) writeonly buffer coeffs_blob { sfpvec8 coeffs_blob_data[]; }; @@ -87,7 +87,7 @@ void main() #if NCNN_image_shader int gi = gz * p.w * p.h + gy * p.w + gx; - image1d_st8(coeffs_blob, gi, a); + image3d_st8(coeffs_blob, ivec3(gi, 0, 0), a); #else int gi = gz * p.w + gx; @@ -123,7 +123,7 @@ void main() #if NCNN_image_shader int gi = gz * p.w * p.h + gy * p.w + gx; - image1d_st8(coeffs_blob, gi, a8); + image3d_st8(coeffs_blob, ivec3(gi, 0, 0), a8); #else int gi = gz * p.w + gx; diff --git a/src/layer/vulkan/shader/normalize_norm.comp b/src/layer/vulkan/shader/normalize_norm.comp index 14f3653ca..ed39d4842 100644 --- a/src/layer/vulkan/shader/normalize_norm.comp +++ b/src/layer/vulkan/shader/normalize_norm.comp @@ -37,8 +37,8 @@ layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler1D coeffs_blob; -layout (binding = 3) uniform unfp sampler1D scale_blob; +layout (binding = 2) uniform unfp sampler3D coeffs_blob; +layout (binding = 3) uniform unfp sampler3D scale_blob; #else layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer coeffs_blob { sfp coeffs_blob_data[]; }; @@ -76,7 +76,7 @@ void main() if (across_spatial == 1 && across_channel == 1) { #if NCNN_image_shader - a = image1d_ld1(coeffs_blob, 0); + a = image3d_ld1(coeffs_blob, ivec3(0, 0, 0)); #else a = buffer_ld1(coeffs_blob_data, 0); #endif @@ -85,7 +85,7 @@ void main() if (across_spatial == 1 && across_channel == 0) { #if NCNN_image_shader - a = image1d_ld1(coeffs_blob, gz); + a = image3d_ld1(coeffs_blob, ivec3(gz, 0, 0)); #else a = buffer_ld1(coeffs_blob_data, gz); #endif @@ -94,7 +94,7 @@ void main() if (across_spatial == 0 && across_channel == 1) { #if NCNN_image_shader - a = image1d_ld1(coeffs_blob, gy * psc(w) + gx); + a = image3d_ld1(coeffs_blob, ivec3(gy * psc(w) + gx, 0, 0)); #else a = buffer_ld1(coeffs_blob_data, gy * psc(w) + gx); #endif @@ -111,7 +111,7 @@ void main() else { #if NCNN_image_shader - v = v * image1d_ld1(scale_blob, gz); + v = v * image3d_ld1(scale_blob, ivec3(gz, 0, 0)); #else v = v * buffer_ld1(scale_blob_data, gz); #endif diff --git a/src/layer/vulkan/shader/normalize_norm_pack4.comp b/src/layer/vulkan/shader/normalize_norm_pack4.comp index 553819e06..8ba2b511d 100644 --- a/src/layer/vulkan/shader/normalize_norm_pack4.comp +++ b/src/layer/vulkan/shader/normalize_norm_pack4.comp @@ -37,8 +37,8 @@ layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler1D coeffs_blob; -layout (binding = 3) uniform unfp sampler1D scale_blob; +layout (binding = 2) uniform unfp sampler3D coeffs_blob; +layout (binding = 3) uniform unfp sampler3D scale_blob; #else layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer coeffs_blob { sfpvec4 coeffs_blob_data[]; }; @@ -76,7 +76,7 @@ void main() if (across_spatial == 1 && across_channel == 1) { #if NCNN_image_shader - a = image1d_ld4(coeffs_blob, 0); + a = image3d_ld4(coeffs_blob, ivec3(0, 0, 0)); #else a = buffer_ld4(coeffs_blob_data, 0); #endif @@ -85,7 +85,7 @@ void main() if (across_spatial == 1 && across_channel == 0) { #if NCNN_image_shader - a = image1d_ld4(coeffs_blob, gz); + a = image3d_ld4(coeffs_blob, ivec3(gz, 0, 0)); #else a = buffer_ld4(coeffs_blob_data, gz); #endif @@ -94,7 +94,7 @@ void main() if (across_spatial == 0 && across_channel == 1) { #if NCNN_image_shader - a = image1d_ld4(coeffs_blob, gy * psc(w) + gx); + a = image3d_ld4(coeffs_blob, ivec3(gy * psc(w) + gx, 0, 0)); #else a = buffer_ld4(coeffs_blob_data, gy * psc(w) + gx); #endif @@ -120,7 +120,7 @@ void main() else { #if NCNN_image_shader - v = v * image1d_ld4(scale_blob, gz); + v = v * image3d_ld4(scale_blob, ivec3(gz, 0, 0)); #else v = v * buffer_ld4(scale_blob_data, gz); #endif diff --git a/src/layer/vulkan/shader/normalize_norm_pack8.comp b/src/layer/vulkan/shader/normalize_norm_pack8.comp index 1f4877546..9f63eea22 100644 --- a/src/layer/vulkan/shader/normalize_norm_pack8.comp +++ b/src/layer/vulkan/shader/normalize_norm_pack8.comp @@ -38,8 +38,8 @@ layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler1D coeffs_blob; -layout (binding = 3) uniform unfp sampler1D scale_blob; +layout (binding = 2) uniform unfp sampler3D coeffs_blob; +layout (binding = 3) uniform unfp sampler3D scale_blob; #else layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer coeffs_blob { sfpvec8 coeffs_blob_data[]; }; @@ -77,7 +77,7 @@ void main() if (across_spatial == 1 && across_channel == 1) { #if NCNN_image_shader - a = image1d_ld8(coeffs_blob, 0); + a = image3d_ld8(coeffs_blob, ivec3(0, 0, 0)); #else a = buffer_ld8(coeffs_blob_data, 0); #endif @@ -86,7 +86,7 @@ void main() if (across_spatial == 1 && across_channel == 0) { #if NCNN_image_shader - a = image1d_ld8(coeffs_blob, gz); + a = image3d_ld8(coeffs_blob, ivec3(gz, 0, 0)); #else a = buffer_ld8(coeffs_blob_data, gz); #endif @@ -95,7 +95,7 @@ void main() if (across_spatial == 0 && across_channel == 1) { #if NCNN_image_shader - a = image1d_ld8(coeffs_blob, gy * psc(w) + gx); + a = image3d_ld8(coeffs_blob, ivec3(gy * psc(w) + gx, 0, 0)); #else a = buffer_ld8(coeffs_blob_data, gy * psc(w) + gx); #endif @@ -123,7 +123,7 @@ void main() else { #if NCNN_image_shader - afpvec8 scale = image1d_ld8(scale_blob, gz); + afpvec8 scale = image3d_ld8(scale_blob, ivec3(gz, 0, 0)); #else afpvec8 scale = buffer_ld8(scale_blob_data, gz); #endif diff --git a/src/layer/vulkan/shader/packing.comp b/src/layer/vulkan/shader/packing.comp index facddfff9..4c09934b0 100644 --- a/src/layer/vulkan/shader/packing.comp +++ b/src/layer/vulkan/shader/packing.comp @@ -40,11 +40,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #endif @@ -85,7 +81,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image1d_ld1(bottom_blob_1d, gx); + v = image3d_ld1(bottom_blob_3d, ivec3(gx, 0, 0)); } #endif @@ -98,7 +94,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image1d_st1(top_blob_1d, gx, v); + image3d_st1(top_blob_3d, ivec3(gx, 0, 0), v); } #endif } @@ -113,7 +109,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); + v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, 0)); } #endif @@ -126,7 +122,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image2d_st1(top_blob_2d, ivec2(gx, gy), v); + image3d_st1(top_blob_3d, ivec3(gx, gy, 0), v); } #endif } diff --git a/src/layer/vulkan/shader/packing_fp16_to_fp32.comp b/src/layer/vulkan/shader/packing_fp16_to_fp32.comp index e28d6200c..ac36ede19 100644 --- a/src/layer/vulkan/shader/packing_fp16_to_fp32.comp +++ b/src/layer/vulkan/shader/packing_fp16_to_fp32.comp @@ -40,11 +40,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob_fp32 { float top_blob_fp32_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, r32f) writeonly uniform highp image1D top_blob_1d_fp32; -layout (binding = 3, r32f) writeonly uniform highp image2D top_blob_2d_fp32; layout (binding = 3, r32f) writeonly uniform highp image3D top_blob_3d_fp32; #endif @@ -85,7 +81,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image1d_ld1(bottom_blob_1d, gx); + v = image3d_ld1(bottom_blob_3d, ivec3(gx, 0, 0)); } #endif @@ -98,7 +94,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - imageStore(top_blob_1d_fp32, gx, vec4(v)); + imageStore(top_blob_3d_fp32, ivec3(gx, 0, 0), vec4(v)); } #endif } @@ -113,7 +109,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); + v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, 0)); } #endif @@ -126,7 +122,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - imageStore(top_blob_2d_fp32, ivec2(gx, gy), vec4(v)); + imageStore(top_blob_3d_fp32, ivec3(gx, gy, 0), vec4(v)); } #endif } diff --git a/src/layer/vulkan/shader/packing_fp32_to_fp16.comp b/src/layer/vulkan/shader/packing_fp32_to_fp16.comp index 03df9cb59..6502370c6 100644 --- a/src/layer/vulkan/shader/packing_fp32_to_fp16.comp +++ b/src/layer/vulkan/shader/packing_fp32_to_fp16.comp @@ -40,11 +40,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob_fp32 { float bottom_blob_fp32_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform highp sampler1D bottom_blob_1d_fp32; -layout (binding = 2) uniform highp sampler2D bottom_blob_2d_fp32; layout (binding = 2) uniform highp sampler3D bottom_blob_3d_fp32; -layout (binding = 3, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #endif @@ -85,7 +81,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = afp(texelFetch(bottom_blob_1d_fp32, gx, 0).r); + v = afp(texelFetch(bottom_blob_3d_fp32, ivec3(gx, 0, 0), 0).r); } #endif @@ -98,7 +94,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image1d_st1(top_blob_1d, gx, v); + image3d_st1(top_blob_3d, ivec3(gx, 0, 0), v); } #endif } @@ -113,7 +109,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = afp(texelFetch(bottom_blob_2d_fp32, ivec2(gx, gy), 0).r); + v = afp(texelFetch(bottom_blob_3d_fp32, ivec3(gx, gy, 0), 0).r); } #endif @@ -126,7 +122,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image2d_st1(top_blob_2d, ivec2(gx, gy), v); + image3d_st1(top_blob_3d, ivec3(gx, gy, 0), v); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack1to4.comp b/src/layer/vulkan/shader/packing_pack1to4.comp index a09fe132a..6ba5c3123 100644 --- a/src/layer/vulkan/shader/packing_pack1to4.comp +++ b/src/layer/vulkan/shader/packing_pack1to4.comp @@ -40,11 +40,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #endif @@ -92,10 +88,10 @@ void main() { int x4 = gx * 4; - v.r = image1d_ld1(bottom_blob_1d, x4 + 0); - v.g = image1d_ld1(bottom_blob_1d, x4 + 1); - v.b = image1d_ld1(bottom_blob_1d, x4 + 2); - v.a = image1d_ld1(bottom_blob_1d, x4 + 3); + v.r = image3d_ld1(bottom_blob_3d, ivec3(x4 + 0, 0, 0)); + v.g = image3d_ld1(bottom_blob_3d, ivec3(x4 + 1, 0, 0)); + v.b = image3d_ld1(bottom_blob_3d, ivec3(x4 + 2, 0, 0)); + v.a = image3d_ld1(bottom_blob_3d, ivec3(x4 + 3, 0, 0)); } #endif @@ -108,7 +104,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image1d_st4(top_blob_1d, gx, v); + image3d_st4(top_blob_3d, ivec3(gx, 0, 0), v); } #endif } @@ -130,10 +126,10 @@ void main() { int y4 = gy * 4; - v.r = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 0)); - v.g = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 1)); - v.b = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 2)); - v.a = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 3)); + v.r = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 0, 0)); + v.g = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 1, 0)); + v.b = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 2, 0)); + v.a = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 3, 0)); } #endif @@ -146,7 +142,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); + image3d_st4(top_blob_3d, ivec3(gx, gy, 0), v); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack1to4_fp16_to_fp32.comp b/src/layer/vulkan/shader/packing_pack1to4_fp16_to_fp32.comp index fa69c886d..48d2101ae 100644 --- a/src/layer/vulkan/shader/packing_pack1to4_fp16_to_fp32.comp +++ b/src/layer/vulkan/shader/packing_pack1to4_fp16_to_fp32.comp @@ -40,11 +40,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob_fp32 { vec4 top_blob_fp32_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, rgba32f) writeonly uniform highp image1D top_blob_1d_fp32; -layout (binding = 3, rgba32f) writeonly uniform highp image2D top_blob_2d_fp32; layout (binding = 3, rgba32f) writeonly uniform highp image3D top_blob_3d_fp32; #endif @@ -92,10 +88,10 @@ void main() { int x4 = gx * 4; - v.r = image1d_ld1(bottom_blob_1d, x4 + 0); - v.g = image1d_ld1(bottom_blob_1d, x4 + 1); - v.b = image1d_ld1(bottom_blob_1d, x4 + 2); - v.a = image1d_ld1(bottom_blob_1d, x4 + 3); + v.r = image3d_ld1(bottom_blob_3d, ivec3(x4 + 0, 0, 0)); + v.g = image3d_ld1(bottom_blob_3d, ivec3(x4 + 1, 0, 0)); + v.b = image3d_ld1(bottom_blob_3d, ivec3(x4 + 2, 0, 0)); + v.a = image3d_ld1(bottom_blob_3d, ivec3(x4 + 3, 0, 0)); } #endif @@ -108,7 +104,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - imageStore(top_blob_1d_fp32, gx, v); + imageStore(top_blob_3d_fp32, ivec3(gx, 0, 0), v); } #endif } @@ -130,10 +126,10 @@ void main() { int y4 = gy * 4; - v.r = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 0)); - v.g = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 1)); - v.b = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 2)); - v.a = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 3)); + v.r = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 0, 0)); + v.g = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 1, 0)); + v.b = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 2, 0)); + v.a = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 3, 0)); } #endif @@ -146,7 +142,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - imageStore(top_blob_2d_fp32, ivec2(gx, gy), v); + imageStore(top_blob_3d_fp32, ivec3(gx, gy, 0), v); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack1to4_fp32_to_fp16.comp b/src/layer/vulkan/shader/packing_pack1to4_fp32_to_fp16.comp index 0d9ab9c53..b8a34014f 100644 --- a/src/layer/vulkan/shader/packing_pack1to4_fp32_to_fp16.comp +++ b/src/layer/vulkan/shader/packing_pack1to4_fp32_to_fp16.comp @@ -40,11 +40,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob_fp32 { float bottom_blob_fp32_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform highp sampler1D bottom_blob_1d_fp32; -layout (binding = 2) uniform highp sampler2D bottom_blob_2d_fp32; layout (binding = 2) uniform highp sampler3D bottom_blob_3d_fp32; -layout (binding = 3, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #endif @@ -92,10 +88,10 @@ void main() { int x4 = gx * 4; - v.r = afp(texelFetch(bottom_blob_1d_fp32, x4 + 0, 0).r); - v.g = afp(texelFetch(bottom_blob_1d_fp32, x4 + 1, 0).r); - v.b = afp(texelFetch(bottom_blob_1d_fp32, x4 + 2, 0).r); - v.a = afp(texelFetch(bottom_blob_1d_fp32, x4 + 3, 0).r); + v.r = afp(texelFetch(bottom_blob_3d_fp32, ivec3(x4 + 0, 0, 0), 0).r); + v.g = afp(texelFetch(bottom_blob_3d_fp32, ivec3(x4 + 1, 0, 0), 0).r); + v.b = afp(texelFetch(bottom_blob_3d_fp32, ivec3(x4 + 2, 0, 0), 0).r); + v.a = afp(texelFetch(bottom_blob_3d_fp32, ivec3(x4 + 3, 0, 0), 0).r); } #endif @@ -108,7 +104,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image1d_st4(top_blob_1d, gx, v); + image3d_st4(top_blob_3d, ivec3(gx, 0, 0), v); } #endif } @@ -130,10 +126,10 @@ void main() { int y4 = gy * 4; - v.r = afp(texelFetch(bottom_blob_2d_fp32, ivec2(gx, y4 + 0), 0).r); - v.g = afp(texelFetch(bottom_blob_2d_fp32, ivec2(gx, y4 + 1), 0).r); - v.b = afp(texelFetch(bottom_blob_2d_fp32, ivec2(gx, y4 + 2), 0).r); - v.a = afp(texelFetch(bottom_blob_2d_fp32, ivec2(gx, y4 + 3), 0).r); + v.r = afp(texelFetch(bottom_blob_3d_fp32, ivec3(gx, y4 + 0, 0), 0).r); + v.g = afp(texelFetch(bottom_blob_3d_fp32, ivec3(gx, y4 + 1, 0), 0).r); + v.b = afp(texelFetch(bottom_blob_3d_fp32, ivec3(gx, y4 + 2, 0), 0).r); + v.a = afp(texelFetch(bottom_blob_3d_fp32, ivec3(gx, y4 + 3, 0), 0).r); } #endif @@ -146,7 +142,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); + image3d_st4(top_blob_3d, ivec3(gx, gy, 0), v); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack1to8.comp b/src/layer/vulkan/shader/packing_pack1to8.comp index 241f7839c..7f021c106 100644 --- a/src/layer/vulkan/shader/packing_pack1to8.comp +++ b/src/layer/vulkan/shader/packing_pack1to8.comp @@ -41,11 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #endif @@ -98,14 +94,14 @@ void main() { int x4 = gx * 8; - v[0].r = image1d_ld1(bottom_blob_1d, x4 + 0); - v[0].g = image1d_ld1(bottom_blob_1d, x4 + 1); - v[0].b = image1d_ld1(bottom_blob_1d, x4 + 2); - v[0].a = image1d_ld1(bottom_blob_1d, x4 + 3); - v[1].r = image1d_ld1(bottom_blob_1d, x4 + 4); - v[1].g = image1d_ld1(bottom_blob_1d, x4 + 5); - v[1].b = image1d_ld1(bottom_blob_1d, x4 + 6); - v[1].a = image1d_ld1(bottom_blob_1d, x4 + 7); + v[0].r = image3d_ld1(bottom_blob_3d, ivec3(x4 + 0, 0, 0)); + v[0].g = image3d_ld1(bottom_blob_3d, ivec3(x4 + 1, 0, 0)); + v[0].b = image3d_ld1(bottom_blob_3d, ivec3(x4 + 2, 0, 0)); + v[0].a = image3d_ld1(bottom_blob_3d, ivec3(x4 + 3, 0, 0)); + v[1].r = image3d_ld1(bottom_blob_3d, ivec3(x4 + 4, 0, 0)); + v[1].g = image3d_ld1(bottom_blob_3d, ivec3(x4 + 5, 0, 0)); + v[1].b = image3d_ld1(bottom_blob_3d, ivec3(x4 + 6, 0, 0)); + v[1].a = image3d_ld1(bottom_blob_3d, ivec3(x4 + 7, 0, 0)); } #endif @@ -118,7 +114,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image1d_st8(top_blob_1d, gx, v); + image3d_st8(top_blob_3d, ivec3(gx, 0, 0), v); } #endif } @@ -145,14 +141,14 @@ void main() { int y4 = gy * 8; - v[0].r = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 0)); - v[0].g = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 1)); - v[0].b = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 2)); - v[0].a = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 3)); - v[1].r = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 4)); - v[1].g = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 5)); - v[1].b = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 6)); - v[1].a = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 7)); + v[0].r = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 0, 0)); + v[0].g = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 1, 0)); + v[0].b = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 2, 0)); + v[0].a = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 3, 0)); + v[1].r = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 4, 0)); + v[1].g = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 5, 0)); + v[1].b = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 6, 0)); + v[1].a = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 7, 0)); } #endif @@ -165,7 +161,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); + image3d_st8(top_blob_3d, ivec3(gx, gy, 0), v); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack1to8_fp16_to_fp32.comp b/src/layer/vulkan/shader/packing_pack1to8_fp16_to_fp32.comp index 8537dd1e9..2a059e87e 100644 --- a/src/layer/vulkan/shader/packing_pack1to8_fp16_to_fp32.comp +++ b/src/layer/vulkan/shader/packing_pack1to8_fp16_to_fp32.comp @@ -41,11 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob_fp32 { mat2x4 top_blob_fp32_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, rgba32f) writeonly uniform highp image1D top_blob_1d_fp32; -layout (binding = 3, rgba32f) writeonly uniform highp image2D top_blob_2d_fp32; layout (binding = 3, rgba32f) writeonly uniform highp image3D top_blob_3d_fp32; #endif @@ -98,14 +94,14 @@ void main() { int x4 = gx * 8; - v[0].r = image1d_ld1(bottom_blob_1d, x4 + 0); - v[0].g = image1d_ld1(bottom_blob_1d, x4 + 1); - v[0].b = image1d_ld1(bottom_blob_1d, x4 + 2); - v[0].a = image1d_ld1(bottom_blob_1d, x4 + 3); - v[1].r = image1d_ld1(bottom_blob_1d, x4 + 4); - v[1].g = image1d_ld1(bottom_blob_1d, x4 + 5); - v[1].b = image1d_ld1(bottom_blob_1d, x4 + 6); - v[1].a = image1d_ld1(bottom_blob_1d, x4 + 7); + v[0].r = image3d_ld1(bottom_blob_3d, ivec3(x4 + 0, 0, 0)); + v[0].g = image3d_ld1(bottom_blob_3d, ivec3(x4 + 1, 0, 0)); + v[0].b = image3d_ld1(bottom_blob_3d, ivec3(x4 + 2, 0, 0)); + v[0].a = image3d_ld1(bottom_blob_3d, ivec3(x4 + 3, 0, 0)); + v[1].r = image3d_ld1(bottom_blob_3d, ivec3(x4 + 4, 0, 0)); + v[1].g = image3d_ld1(bottom_blob_3d, ivec3(x4 + 5, 0, 0)); + v[1].b = image3d_ld1(bottom_blob_3d, ivec3(x4 + 6, 0, 0)); + v[1].a = image3d_ld1(bottom_blob_3d, ivec3(x4 + 7, 0, 0)); } #endif @@ -118,8 +114,8 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - imageStore(top_blob_1d_fp32, gx * 2, v[0]); - imageStore(top_blob_1d_fp32, gx * 2 + 1, v[1]); + imageStore(top_blob_3d_fp32, ivec3(gx * 2, 0, 0), v[0]); + imageStore(top_blob_3d_fp32, ivec3(gx * 2 + 1, 0, 0), v[1]); } #endif } @@ -146,14 +142,14 @@ void main() { int y4 = gy * 8; - v[0].r = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 0)); - v[0].g = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 1)); - v[0].b = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 2)); - v[0].a = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 3)); - v[1].r = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 4)); - v[1].g = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 5)); - v[1].b = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 6)); - v[1].a = image2d_ld1(bottom_blob_2d, ivec2(gx, y4 + 7)); + v[0].r = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 0, 0)); + v[0].g = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 1, 0)); + v[0].b = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 2, 0)); + v[0].a = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 3, 0)); + v[1].r = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 4, 0)); + v[1].g = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 5, 0)); + v[1].b = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 6, 0)); + v[1].a = image3d_ld1(bottom_blob_3d, ivec3(gx, y4 + 7, 0)); } #endif @@ -166,8 +162,8 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - imageStore(top_blob_2d_fp32, ivec2(gx * 2, gy), v[0]); - imageStore(top_blob_2d_fp32, ivec2(gx * 2 + 1, gy), v[1]); + imageStore(top_blob_3d_fp32, ivec3(gx * 2, gy, 0), v[0]); + imageStore(top_blob_3d_fp32, ivec3(gx * 2 + 1, gy, 0), v[1]); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack1to8_fp32_to_fp16.comp b/src/layer/vulkan/shader/packing_pack1to8_fp32_to_fp16.comp index af19e404f..b27487903 100644 --- a/src/layer/vulkan/shader/packing_pack1to8_fp32_to_fp16.comp +++ b/src/layer/vulkan/shader/packing_pack1to8_fp32_to_fp16.comp @@ -41,11 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob_fp32 { float bottom_blob_fp32_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform highp sampler1D bottom_blob_1d_fp32; -layout (binding = 2) uniform highp sampler2D bottom_blob_2d_fp32; layout (binding = 2) uniform highp sampler3D bottom_blob_3d_fp32; -layout (binding = 3, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #endif @@ -98,14 +94,14 @@ void main() { int x4 = gx * 8; - v[0].r = afp(texelFetch(bottom_blob_1d_fp32, x4 + 0, 0).r); - v[0].g = afp(texelFetch(bottom_blob_1d_fp32, x4 + 1, 0).r); - v[0].b = afp(texelFetch(bottom_blob_1d_fp32, x4 + 2, 0).r); - v[0].a = afp(texelFetch(bottom_blob_1d_fp32, x4 + 3, 0).r); - v[1].r = afp(texelFetch(bottom_blob_1d_fp32, x4 + 4, 0).r); - v[1].g = afp(texelFetch(bottom_blob_1d_fp32, x4 + 5, 0).r); - v[1].b = afp(texelFetch(bottom_blob_1d_fp32, x4 + 6, 0).r); - v[1].a = afp(texelFetch(bottom_blob_1d_fp32, x4 + 7, 0).r); + v[0].r = afp(texelFetch(bottom_blob_3d_fp32, ivec3(x4 + 0, 0, 0), 0).r); + v[0].g = afp(texelFetch(bottom_blob_3d_fp32, ivec3(x4 + 1, 0, 0), 0).r); + v[0].b = afp(texelFetch(bottom_blob_3d_fp32, ivec3(x4 + 2, 0, 0), 0).r); + v[0].a = afp(texelFetch(bottom_blob_3d_fp32, ivec3(x4 + 3, 0, 0), 0).r); + v[1].r = afp(texelFetch(bottom_blob_3d_fp32, ivec3(x4 + 4, 0, 0), 0).r); + v[1].g = afp(texelFetch(bottom_blob_3d_fp32, ivec3(x4 + 5, 0, 0), 0).r); + v[1].b = afp(texelFetch(bottom_blob_3d_fp32, ivec3(x4 + 6, 0, 0), 0).r); + v[1].a = afp(texelFetch(bottom_blob_3d_fp32, ivec3(x4 + 7, 0, 0), 0).r); } #endif @@ -118,7 +114,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image1d_st8(top_blob_1d, gx, v); + image3d_st8(top_blob_3d, ivec3(gx, 0, 0), v); } #endif } @@ -145,14 +141,14 @@ void main() { int y4 = gy * 8; - v[0].r = afp(texelFetch(bottom_blob_2d_fp32, ivec2(gx, y4 + 0), 0).r); - v[0].g = afp(texelFetch(bottom_blob_2d_fp32, ivec2(gx, y4 + 1), 0).r); - v[0].b = afp(texelFetch(bottom_blob_2d_fp32, ivec2(gx, y4 + 2), 0).r); - v[0].a = afp(texelFetch(bottom_blob_2d_fp32, ivec2(gx, y4 + 3), 0).r); - v[1].r = afp(texelFetch(bottom_blob_2d_fp32, ivec2(gx, y4 + 4), 0).r); - v[1].g = afp(texelFetch(bottom_blob_2d_fp32, ivec2(gx, y4 + 5), 0).r); - v[1].b = afp(texelFetch(bottom_blob_2d_fp32, ivec2(gx, y4 + 6), 0).r); - v[1].a = afp(texelFetch(bottom_blob_2d_fp32, ivec2(gx, y4 + 7), 0).r); + v[0].r = afp(texelFetch(bottom_blob_3d_fp32, ivec3(gx, y4 + 0, 0), 0).r); + v[0].g = afp(texelFetch(bottom_blob_3d_fp32, ivec3(gx, y4 + 1, 0), 0).r); + v[0].b = afp(texelFetch(bottom_blob_3d_fp32, ivec3(gx, y4 + 2, 0), 0).r); + v[0].a = afp(texelFetch(bottom_blob_3d_fp32, ivec3(gx, y4 + 3, 0), 0).r); + v[1].r = afp(texelFetch(bottom_blob_3d_fp32, ivec3(gx, y4 + 4, 0), 0).r); + v[1].g = afp(texelFetch(bottom_blob_3d_fp32, ivec3(gx, y4 + 5, 0), 0).r); + v[1].b = afp(texelFetch(bottom_blob_3d_fp32, ivec3(gx, y4 + 6, 0), 0).r); + v[1].a = afp(texelFetch(bottom_blob_3d_fp32, ivec3(gx, y4 + 7, 0), 0).r); } #endif @@ -165,7 +161,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); + image3d_st8(top_blob_3d, ivec3(gx, gy, 0), v); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack4.comp b/src/layer/vulkan/shader/packing_pack4.comp index 5ca1d557d..b0697d253 100644 --- a/src/layer/vulkan/shader/packing_pack4.comp +++ b/src/layer/vulkan/shader/packing_pack4.comp @@ -40,11 +40,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #endif @@ -85,7 +81,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image1d_ld4(bottom_blob_1d, gx); + v = image3d_ld4(bottom_blob_3d, ivec3(gx, 0, 0)); } #endif @@ -98,7 +94,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image1d_st4(top_blob_1d, gx, v); + image3d_st4(top_blob_3d, ivec3(gx, 0, 0), v); } #endif } @@ -113,7 +109,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); + v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, 0)); } #endif @@ -126,7 +122,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); + image3d_st4(top_blob_3d, ivec3(gx, gy, 0), v); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack4_fp16_to_fp32.comp b/src/layer/vulkan/shader/packing_pack4_fp16_to_fp32.comp index 7beb27ef2..2728820d9 100644 --- a/src/layer/vulkan/shader/packing_pack4_fp16_to_fp32.comp +++ b/src/layer/vulkan/shader/packing_pack4_fp16_to_fp32.comp @@ -40,11 +40,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob_fp32 { vec4 top_blob_fp32_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, rgba32f) writeonly uniform highp image1D top_blob_1d_fp32; -layout (binding = 3, rgba32f) writeonly uniform highp image2D top_blob_2d_fp32; layout (binding = 3, rgba32f) writeonly uniform highp image3D top_blob_3d_fp32; #endif @@ -85,7 +81,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image1d_ld4(bottom_blob_1d, gx); + v = image3d_ld4(bottom_blob_3d, ivec3(gx, 0, 0)); } #endif @@ -98,7 +94,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - imageStore(top_blob_1d_fp32, gx, v); + imageStore(top_blob_3d_fp32, ivec3(gx, 0, 0), v); } #endif } @@ -113,7 +109,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); + v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, 0)); } #endif @@ -126,7 +122,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - imageStore(top_blob_2d_fp32, ivec2(gx, gy), v); + imageStore(top_blob_3d_fp32, ivec3(gx, gy, 0), v); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack4_fp32_to_fp16.comp b/src/layer/vulkan/shader/packing_pack4_fp32_to_fp16.comp index c7d432ef6..2656840ae 100644 --- a/src/layer/vulkan/shader/packing_pack4_fp32_to_fp16.comp +++ b/src/layer/vulkan/shader/packing_pack4_fp32_to_fp16.comp @@ -40,11 +40,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob_fp32 { vec4 bottom_blob_fp32_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform highp sampler1D bottom_blob_1d_fp32; -layout (binding = 2) uniform highp sampler2D bottom_blob_2d_fp32; layout (binding = 2) uniform highp sampler3D bottom_blob_3d_fp32; -layout (binding = 3, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #endif @@ -85,7 +81,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = afpvec4(texelFetch(bottom_blob_1d_fp32, gx, 0)); + v = afpvec4(texelFetch(bottom_blob_3d_fp32, ivec3(gx, 0, 0), 0)); } #endif @@ -98,7 +94,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image1d_st4(top_blob_1d, gx, v); + image3d_st4(top_blob_3d, ivec3(gx, 0, 0), v); } #endif } @@ -113,7 +109,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = afpvec4(texelFetch(bottom_blob_2d_fp32, ivec2(gx, gy), 0)); + v = afpvec4(texelFetch(bottom_blob_3d_fp32, ivec3(gx, gy, 0), 0)); } #endif @@ -126,7 +122,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); + image3d_st4(top_blob_3d, ivec3(gx, 0, 0), v); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack4to1.comp b/src/layer/vulkan/shader/packing_pack4to1.comp index 9e67fa332..fc89465d0 100644 --- a/src/layer/vulkan/shader/packing_pack4to1.comp +++ b/src/layer/vulkan/shader/packing_pack4to1.comp @@ -40,11 +40,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #endif @@ -85,7 +81,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image1d_ld4(bottom_blob_1d, gx); + v = image3d_ld4(bottom_blob_3d, ivec3(gx, 0, 0)); } #endif @@ -105,10 +101,10 @@ void main() { int x4 = gx * 4; - image1d_st1(top_blob_1d, x4 + 0, v.r); - image1d_st1(top_blob_1d, x4 + 1, v.g); - image1d_st1(top_blob_1d, x4 + 2, v.b); - image1d_st1(top_blob_1d, x4 + 3, v.a); + image3d_st1(top_blob_3d, ivec3(x4 + 0, 0, 0), v.r); + image3d_st1(top_blob_3d, ivec3(x4 + 1, 0, 0), v.g); + image3d_st1(top_blob_3d, ivec3(x4 + 2, 0, 0), v.b); + image3d_st1(top_blob_3d, ivec3(x4 + 3, 0, 0), v.a); } #endif } @@ -123,7 +119,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); + v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, 0)); } #endif @@ -143,10 +139,10 @@ void main() { int y4 = gy * 4; - image2d_st1(top_blob_2d, ivec2(gx, y4 + 0), v.r); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 1), v.g); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 2), v.b); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 3), v.a); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 0, 0), v.r); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 1, 0), v.g); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 2, 0), v.b); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 3, 0), v.a); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack4to1_fp16_to_fp32.comp b/src/layer/vulkan/shader/packing_pack4to1_fp16_to_fp32.comp index bd51ba019..4714b3fcc 100644 --- a/src/layer/vulkan/shader/packing_pack4to1_fp16_to_fp32.comp +++ b/src/layer/vulkan/shader/packing_pack4to1_fp16_to_fp32.comp @@ -40,11 +40,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob_fp32 { float top_blob_fp32_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, r32f) writeonly uniform highp image1D top_blob_1d_fp32; -layout (binding = 3, r32f) writeonly uniform highp image2D top_blob_2d_fp32; layout (binding = 3, r32f) writeonly uniform highp image3D top_blob_3d_fp32; #endif @@ -85,7 +81,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image1d_ld4(bottom_blob_1d, gx); + v = image3d_ld4(bottom_blob_3d, ivec3(gx, 0, 0)); } #endif @@ -105,10 +101,10 @@ void main() { int x4 = gx * 4; - image1d_st1(top_blob_1d_fp32, x4 + 0, v.r); - image1d_st1(top_blob_1d_fp32, x4 + 1, v.g); - image1d_st1(top_blob_1d_fp32, x4 + 2, v.b); - image1d_st1(top_blob_1d_fp32, x4 + 3, v.a); + image3d_st1(top_blob_3d_fp32, ivec3(x4 + 0, 0, 0), v.r); + image3d_st1(top_blob_3d_fp32, ivec3(x4 + 1, 0, 0), v.g); + image3d_st1(top_blob_3d_fp32, ivec3(x4 + 2, 0, 0), v.b); + image3d_st1(top_blob_3d_fp32, ivec3(x4 + 3, 0, 0), v.a); } #endif } @@ -123,7 +119,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); + v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, 0)); } #endif @@ -143,10 +139,10 @@ void main() { int y4 = gy * 4; - image2d_st1(top_blob_2d_fp32, ivec2(gx, y4 + 0), v.r); - image2d_st1(top_blob_2d_fp32, ivec2(gx, y4 + 1), v.g); - image2d_st1(top_blob_2d_fp32, ivec2(gx, y4 + 2), v.b); - image2d_st1(top_blob_2d_fp32, ivec2(gx, y4 + 3), v.a); + image3d_st1(top_blob_3d_fp32, ivec3(gx, y4 + 0, 0), v.r); + image3d_st1(top_blob_3d_fp32, ivec3(gx, y4 + 1, 0), v.g); + image3d_st1(top_blob_3d_fp32, ivec3(gx, y4 + 2, 0), v.b); + image3d_st1(top_blob_3d_fp32, ivec3(gx, y4 + 3, 0), v.a); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack4to1_fp32_to_fp16.comp b/src/layer/vulkan/shader/packing_pack4to1_fp32_to_fp16.comp index d0ca5c2d1..61d87537f 100644 --- a/src/layer/vulkan/shader/packing_pack4to1_fp32_to_fp16.comp +++ b/src/layer/vulkan/shader/packing_pack4to1_fp32_to_fp16.comp @@ -40,11 +40,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob_fp32 { vec4 bottom_blob_fp32_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform highp sampler1D bottom_blob_1d_fp32; -layout (binding = 2) uniform highp sampler2D bottom_blob_2d_fp32; layout (binding = 2) uniform highp sampler3D bottom_blob_3d_fp32; -layout (binding = 3, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #endif @@ -85,7 +81,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = afpvec4(texelFetch(bottom_blob_1d_fp32, gx, 0)); + v = afpvec4(texelFetch(bottom_blob_3d_fp32, ivec3(gx, 0, 0), 0)); } #endif @@ -105,10 +101,10 @@ void main() { int x4 = gx * 4; - image1d_st1(top_blob_1d, x4 + 0, v.r); - image1d_st1(top_blob_1d, x4 + 1, v.g); - image1d_st1(top_blob_1d, x4 + 2, v.b); - image1d_st1(top_blob_1d, x4 + 3, v.a); + image3d_st1(top_blob_3d, ivec3(x4 + 0, 0, 0), v.r); + image3d_st1(top_blob_3d, ivec3(x4 + 1, 0, 0), v.g); + image3d_st1(top_blob_3d, ivec3(x4 + 2, 0, 0), v.b); + image3d_st1(top_blob_3d, ivec3(x4 + 3, 0, 0), v.a); } #endif } @@ -123,7 +119,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = afpvec4(texelFetch(bottom_blob_2d_fp32, ivec2(gx, gy), 0)); + v = afpvec4(texelFetch(bottom_blob_3d_fp32, ivec3(gx, gy, 0), 0)); } #endif @@ -143,10 +139,10 @@ void main() { int y4 = gy * 4; - image2d_st1(top_blob_2d, ivec2(gx, y4 + 0), v.r); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 1), v.g); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 2), v.b); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 3), v.a); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 0, 0), v.r); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 1, 0), v.g); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 2, 0), v.b); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 3, 0), v.a); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack4to8.comp b/src/layer/vulkan/shader/packing_pack4to8.comp index 94c7eaf3c..532a93b7f 100644 --- a/src/layer/vulkan/shader/packing_pack4to8.comp +++ b/src/layer/vulkan/shader/packing_pack4to8.comp @@ -41,11 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #endif @@ -91,8 +87,8 @@ void main() { int x2 = gx * 2; - v[0] = image1d_ld4(bottom_blob_1d, x2 + 0); - v[1] = image1d_ld4(bottom_blob_1d, x2 + 1); + v[0] = image3d_ld4(bottom_blob_3d, ivec3(x2 + 0, 0, 0)); + v[1] = image3d_ld4(bottom_blob_3d, ivec3(x2 + 1, 0, 0)); } #endif @@ -105,7 +101,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image1d_st8(top_blob_1d, gx, v); + image3d_st8(top_blob_3d, ivec3(gx, 0, 0), v); } #endif } @@ -125,8 +121,8 @@ void main() { int y2 = gy * 2; - v[0] = image2d_ld4(bottom_blob_2d, ivec2(gx, y2 + 0)); - v[1] = image2d_ld4(bottom_blob_2d, ivec2(gx, y2 + 1)); + v[0] = image3d_ld4(bottom_blob_3d, ivec3(gx, y2 + 0, 0)); + v[1] = image3d_ld4(bottom_blob_3d, ivec3(gx, y2 + 1, 0)); } #endif @@ -139,7 +135,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); + image3d_st8(top_blob_3d, ivec3(gx, gy, 0), v); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack4to8_fp16_to_fp32.comp b/src/layer/vulkan/shader/packing_pack4to8_fp16_to_fp32.comp index d1961eeeb..993399f93 100644 --- a/src/layer/vulkan/shader/packing_pack4to8_fp16_to_fp32.comp +++ b/src/layer/vulkan/shader/packing_pack4to8_fp16_to_fp32.comp @@ -41,11 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob_fp32 { mat2x4 top_blob_fp32_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, rgba32f) writeonly uniform highp image1D top_blob_1d_fp32; -layout (binding = 3, rgba32f) writeonly uniform highp image2D top_blob_2d_fp32; layout (binding = 3, rgba32f) writeonly uniform highp image3D top_blob_3d_fp32; #endif @@ -91,8 +87,8 @@ void main() { int x2 = gx * 2; - v[0] = image1d_ld4(bottom_blob_1d, x2 + 0); - v[1] = image1d_ld4(bottom_blob_1d, x2 + 1); + v[0] = image3d_ld4(bottom_blob_3d, ivec3(x2 + 0, 0, 0)); + v[1] = image3d_ld4(bottom_blob_3d, ivec3(x2 + 1, 0, 0)); } #endif @@ -105,7 +101,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image1d_st8(top_blob_1d_fp32, gx, v); + image3d_st8(top_blob_3d_fp32, ivec3(gx, 0, 0), v); } #endif } @@ -125,8 +121,8 @@ void main() { int y2 = gy * 2; - v[0] = image2d_ld4(bottom_blob_2d, ivec2(gx, y2 + 0)); - v[1] = image2d_ld4(bottom_blob_2d, ivec2(gx, y2 + 1)); + v[0] = image3d_ld4(bottom_blob_3d, ivec3(gx, y2 + 0, 0)); + v[1] = image3d_ld4(bottom_blob_3d, ivec3(gx, y2 + 1, 0)); } #endif @@ -139,7 +135,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image2d_st8(top_blob_2d_fp32, ivec2(gx, gy), v); + image3d_st8(top_blob_3d_fp32, ivec3(gx, gy, 0), v); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack4to8_fp32_to_fp16.comp b/src/layer/vulkan/shader/packing_pack4to8_fp32_to_fp16.comp index 0f3bf75eb..912827a00 100644 --- a/src/layer/vulkan/shader/packing_pack4to8_fp32_to_fp16.comp +++ b/src/layer/vulkan/shader/packing_pack4to8_fp32_to_fp16.comp @@ -41,11 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob_fp32 { vec4 bottom_blob_fp32_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform highp sampler1D bottom_blob_1d_fp32; -layout (binding = 2) uniform highp sampler2D bottom_blob_2d_fp32; layout (binding = 2) uniform highp sampler3D bottom_blob_3d_fp32; -layout (binding = 3, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #endif @@ -91,8 +87,8 @@ void main() { int x2 = gx * 2; - v[0] = image1d_ld4(bottom_blob_1d_fp32, x2 + 0); - v[1] = image1d_ld4(bottom_blob_1d_fp32, x2 + 1); + v[0] = image3d_ld4(bottom_blob_3d_fp32, ivec3(x2 + 0, 0, 0)); + v[1] = image3d_ld4(bottom_blob_3d_fp32, ivec3(x2 + 1, 0, 0)); } #endif @@ -105,7 +101,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image1d_st8(top_blob_1d, gx, v); + image3d_st8(top_blob_3d, ivec3(gx, 0, 0), v); } #endif } @@ -125,8 +121,8 @@ void main() { int y2 = gy * 2; - v[0] = image2d_ld4(bottom_blob_2d_fp32, ivec2(gx, y2 + 0)); - v[1] = image2d_ld4(bottom_blob_2d_fp32, ivec2(gx, y2 + 1)); + v[0] = image3d_ld4(bottom_blob_3d_fp32, ivec3(gx, y2 + 0, 0)); + v[1] = image3d_ld4(bottom_blob_3d_fp32, ivec3(gx, y2 + 1, 0)); } #endif @@ -139,7 +135,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); + image3d_st8(top_blob_3d, ivec3(gx, gy, 0), v); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack8.comp b/src/layer/vulkan/shader/packing_pack8.comp index fe19930f6..95bb5dd12 100644 --- a/src/layer/vulkan/shader/packing_pack8.comp +++ b/src/layer/vulkan/shader/packing_pack8.comp @@ -41,11 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #endif @@ -86,7 +82,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image1d_ld8(bottom_blob_1d, gx); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, 0, 0)); } #endif @@ -99,7 +95,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image1d_st8(top_blob_1d, gx, v); + image3d_st8(top_blob_3d, ivec3(gx, 0, 0), v); } #endif } @@ -114,7 +110,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, 0)); } #endif @@ -127,7 +123,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); + image3d_st8(top_blob_3d, ivec3(gx, gy, 0), v); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack8_fp16_to_fp32.comp b/src/layer/vulkan/shader/packing_pack8_fp16_to_fp32.comp index 8e5e6ee3a..6ebeb5586 100644 --- a/src/layer/vulkan/shader/packing_pack8_fp16_to_fp32.comp +++ b/src/layer/vulkan/shader/packing_pack8_fp16_to_fp32.comp @@ -41,11 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob_fp32 { mat2x4 top_blob_fp32_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, rgba32f) writeonly uniform highp image1D top_blob_1d_fp32; -layout (binding = 3, rgba32f) writeonly uniform highp image2D top_blob_2d_fp32; layout (binding = 3, rgba32f) writeonly uniform highp image3D top_blob_3d_fp32; #endif @@ -86,7 +82,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image1d_ld8(bottom_blob_1d, gx); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, 0, 0)); } #endif @@ -99,8 +95,10 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - imageStore(top_blob_1d_fp32, gx * 2, v[0]); - imageStore(top_blob_1d_fp32, gx * 2 + 1, v[1]); + int x2 = gx * 2; + + imageStore(top_blob_3d_fp32, ivec3(x2 + 0, 0, 0), v[0]); + imageStore(top_blob_3d_fp32, ivec3(x2 + 1, 0, 0), v[1]); } #endif } @@ -115,7 +113,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, 0)); } #endif @@ -128,8 +126,10 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - imageStore(top_blob_2d_fp32, ivec2(gx * 2, gy), v[0]); - imageStore(top_blob_2d_fp32, ivec2(gx * 2 + 1, gy), v[1]); + int x2 = gx * 2; + + imageStore(top_blob_3d_fp32, ivec3(x2 + 0, gy, 0), v[0]); + imageStore(top_blob_3d_fp32, ivec3(x2 + 1, gy, 0), v[1]); } #endif } @@ -157,8 +157,10 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - imageStore(top_blob_3d_fp32, ivec3(gx * 2, gy, gz), v[0]); - imageStore(top_blob_3d_fp32, ivec3(gx * 2 + 1, gy, gz), v[1]); + int x2 = gx * 2; + + imageStore(top_blob_3d_fp32, ivec3(x2 + 0, gy, gz), v[0]); + imageStore(top_blob_3d_fp32, ivec3(x2 + 1, gy, gz), v[1]); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack8_fp32_to_fp16.comp b/src/layer/vulkan/shader/packing_pack8_fp32_to_fp16.comp index c5bb33011..4ceb506c6 100644 --- a/src/layer/vulkan/shader/packing_pack8_fp32_to_fp16.comp +++ b/src/layer/vulkan/shader/packing_pack8_fp32_to_fp16.comp @@ -41,11 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob_fp32 { mat2x4 bottom_blob_fp32_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform highp sampler1D bottom_blob_1d_fp32; -layout (binding = 2) uniform highp sampler2D bottom_blob_2d_fp32; layout (binding = 2) uniform highp sampler3D bottom_blob_3d_fp32; -layout (binding = 3, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #endif @@ -86,7 +82,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = afpvec8(texelFetch(bottom_blob_1d_fp32, gx * 2, 0), texelFetch(bottom_blob_1d_fp32, gx * 2 + 1, 0)); + v = afpvec8(texelFetch(bottom_blob_3d_fp32, ivec3(gx * 2, 0, 0), 0), texelFetch(bottom_blob_3d_fp32, ivec3(gx * 2 + 1, 0, 0), 0)); } #endif @@ -99,7 +95,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image1d_st8(top_blob_1d, gx, v); + image3d_st8(top_blob_3d, ivec3(gx, 0, 0), v); } #endif } @@ -114,7 +110,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = afpvec8(texelFetch(bottom_blob_2d_fp32, ivec2(gx * 2, gy), 0), texelFetch(bottom_blob_2d_fp32, ivec2(gx * 2 + 1, gy), 0)); + v = afpvec8(texelFetch(bottom_blob_3d_fp32, ivec3(gx * 2, gy, 0), 0), texelFetch(bottom_blob_3d_fp32, ivec3(gx * 2 + 1, gy, 0), 0)); } #endif @@ -127,7 +123,7 @@ void main() #if NCNN_image_shader if (storage_type_to == 1) { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); + image3d_st8(top_blob_3d, ivec3(gx, gy, 0), v); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack8to1.comp b/src/layer/vulkan/shader/packing_pack8to1.comp index 63d4396aa..9b358ce7b 100644 --- a/src/layer/vulkan/shader/packing_pack8to1.comp +++ b/src/layer/vulkan/shader/packing_pack8to1.comp @@ -41,11 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #endif @@ -86,7 +82,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image1d_ld8(bottom_blob_1d, gx); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, 0, 0)); } #endif @@ -111,14 +107,14 @@ void main() { int x4 = gx * 8; - image1d_st1(top_blob_1d, x4 + 0, v[0].r); - image1d_st1(top_blob_1d, x4 + 1, v[0].g); - image1d_st1(top_blob_1d, x4 + 2, v[0].b); - image1d_st1(top_blob_1d, x4 + 3, v[0].a); - image1d_st1(top_blob_1d, x4 + 4, v[1].r); - image1d_st1(top_blob_1d, x4 + 5, v[1].g); - image1d_st1(top_blob_1d, x4 + 6, v[1].b); - image1d_st1(top_blob_1d, x4 + 7, v[1].a); + image3d_st1(top_blob_3d, ivec3(x4 + 0, 0, 0), v[0].r); + image3d_st1(top_blob_3d, ivec3(x4 + 1, 0, 0), v[0].g); + image3d_st1(top_blob_3d, ivec3(x4 + 2, 0, 0), v[0].b); + image3d_st1(top_blob_3d, ivec3(x4 + 3, 0, 0), v[0].a); + image3d_st1(top_blob_3d, ivec3(x4 + 4, 0, 0), v[1].r); + image3d_st1(top_blob_3d, ivec3(x4 + 5, 0, 0), v[1].g); + image3d_st1(top_blob_3d, ivec3(x4 + 6, 0, 0), v[1].b); + image3d_st1(top_blob_3d, ivec3(x4 + 7, 0, 0), v[1].a); } #endif } @@ -133,7 +129,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, 0)); } #endif @@ -158,14 +154,14 @@ void main() { int y4 = gy * 8; - image2d_st1(top_blob_2d, ivec2(gx, y4 + 0), v[0].r); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 1), v[0].g); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 2), v[0].b); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 3), v[0].a); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 4), v[1].r); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 5), v[1].g); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 6), v[1].b); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 7), v[1].a); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 0, 0), v[0].r); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 1, 0), v[0].g); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 2, 0), v[0].b); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 3, 0), v[0].a); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 4, 0), v[1].r); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 5, 0), v[1].g); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 6, 0), v[1].b); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 7, 0), v[1].a); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack8to1_fp16_to_fp32.comp b/src/layer/vulkan/shader/packing_pack8to1_fp16_to_fp32.comp index df99b2efa..3e8a00df5 100644 --- a/src/layer/vulkan/shader/packing_pack8to1_fp16_to_fp32.comp +++ b/src/layer/vulkan/shader/packing_pack8to1_fp16_to_fp32.comp @@ -41,11 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob_fp32 { float top_blob_fp32_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, r32f) writeonly uniform highp image1D top_blob_1d_fp32; -layout (binding = 3, r32f) writeonly uniform highp image2D top_blob_2d_fp32; layout (binding = 3, r32f) writeonly uniform highp image3D top_blob_3d_fp32; #endif @@ -86,7 +82,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image1d_ld8(bottom_blob_1d, gx); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, 0, 0)); } #endif @@ -111,14 +107,14 @@ void main() { int x4 = gx * 8; - image1d_st1(top_blob_1d_fp32, x4 + 0, v[0].r); - image1d_st1(top_blob_1d_fp32, x4 + 1, v[0].g); - image1d_st1(top_blob_1d_fp32, x4 + 2, v[0].b); - image1d_st1(top_blob_1d_fp32, x4 + 3, v[0].a); - image1d_st1(top_blob_1d_fp32, x4 + 4, v[1].r); - image1d_st1(top_blob_1d_fp32, x4 + 5, v[1].g); - image1d_st1(top_blob_1d_fp32, x4 + 6, v[1].b); - image1d_st1(top_blob_1d_fp32, x4 + 7, v[1].a); + image3d_st1(top_blob_3d_fp32, ivec3(x4 + 0, 0, 0), v[0].r); + image3d_st1(top_blob_3d_fp32, ivec3(x4 + 1, 0, 0), v[0].g); + image3d_st1(top_blob_3d_fp32, ivec3(x4 + 2, 0, 0), v[0].b); + image3d_st1(top_blob_3d_fp32, ivec3(x4 + 3, 0, 0), v[0].a); + image3d_st1(top_blob_3d_fp32, ivec3(x4 + 4, 0, 0), v[1].r); + image3d_st1(top_blob_3d_fp32, ivec3(x4 + 5, 0, 0), v[1].g); + image3d_st1(top_blob_3d_fp32, ivec3(x4 + 6, 0, 0), v[1].b); + image3d_st1(top_blob_3d_fp32, ivec3(x4 + 7, 0, 0), v[1].a); } #endif } @@ -133,7 +129,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, 0)); } #endif @@ -158,14 +154,14 @@ void main() { int y4 = gy * 8; - image2d_st1(top_blob_2d_fp32, ivec2(gx, y4 + 0), v[0].r); - image2d_st1(top_blob_2d_fp32, ivec2(gx, y4 + 1), v[0].g); - image2d_st1(top_blob_2d_fp32, ivec2(gx, y4 + 2), v[0].b); - image2d_st1(top_blob_2d_fp32, ivec2(gx, y4 + 3), v[0].a); - image2d_st1(top_blob_2d_fp32, ivec2(gx, y4 + 4), v[1].r); - image2d_st1(top_blob_2d_fp32, ivec2(gx, y4 + 5), v[1].g); - image2d_st1(top_blob_2d_fp32, ivec2(gx, y4 + 6), v[1].b); - image2d_st1(top_blob_2d_fp32, ivec2(gx, y4 + 7), v[1].a); + image3d_st1(top_blob_3d_fp32, ivec3(gx, y4 + 0, 0), v[0].r); + image3d_st1(top_blob_3d_fp32, ivec3(gx, y4 + 1, 0), v[0].g); + image3d_st1(top_blob_3d_fp32, ivec3(gx, y4 + 2, 0), v[0].b); + image3d_st1(top_blob_3d_fp32, ivec3(gx, y4 + 3, 0), v[0].a); + image3d_st1(top_blob_3d_fp32, ivec3(gx, y4 + 4, 0), v[1].r); + image3d_st1(top_blob_3d_fp32, ivec3(gx, y4 + 5, 0), v[1].g); + image3d_st1(top_blob_3d_fp32, ivec3(gx, y4 + 6, 0), v[1].b); + image3d_st1(top_blob_3d_fp32, ivec3(gx, y4 + 7, 0), v[1].a); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack8to1_fp32_to_fp16.comp b/src/layer/vulkan/shader/packing_pack8to1_fp32_to_fp16.comp index 32d84d37a..599269a6d 100644 --- a/src/layer/vulkan/shader/packing_pack8to1_fp32_to_fp16.comp +++ b/src/layer/vulkan/shader/packing_pack8to1_fp32_to_fp16.comp @@ -41,11 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob_fp32 { mat2x4 bottom_blob_fp32_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform highp sampler1D bottom_blob_1d_fp32; -layout (binding = 2) uniform highp sampler2D bottom_blob_2d_fp32; layout (binding = 2) uniform highp sampler3D bottom_blob_3d_fp32; -layout (binding = 3, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #endif @@ -86,7 +82,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = afpvec8(texelFetch(bottom_blob_1d_fp32, gx * 2, 0), texelFetch(bottom_blob_1d_fp32, gx * 2 + 1, 0)); + v = afpvec8(texelFetch(bottom_blob_3d_fp32, ivec3(gx * 2, 0, 0), 0), texelFetch(bottom_blob_3d_fp32, ivec3(gx * 2 + 1, 0, 0), 0)); } #endif @@ -111,14 +107,14 @@ void main() { int x4 = gx * 8; - image1d_st1(top_blob_1d, x4 + 0, v[0].r); - image1d_st1(top_blob_1d, x4 + 1, v[0].g); - image1d_st1(top_blob_1d, x4 + 2, v[0].b); - image1d_st1(top_blob_1d, x4 + 3, v[0].a); - image1d_st1(top_blob_1d, x4 + 4, v[1].r); - image1d_st1(top_blob_1d, x4 + 5, v[1].g); - image1d_st1(top_blob_1d, x4 + 6, v[1].b); - image1d_st1(top_blob_1d, x4 + 7, v[1].a); + image3d_st1(top_blob_3d, ivec3(x4 + 0, 0, 0), v[0].r); + image3d_st1(top_blob_3d, ivec3(x4 + 1, 0, 0), v[0].g); + image3d_st1(top_blob_3d, ivec3(x4 + 2, 0, 0), v[0].b); + image3d_st1(top_blob_3d, ivec3(x4 + 3, 0, 0), v[0].a); + image3d_st1(top_blob_3d, ivec3(x4 + 4, 0, 0), v[1].r); + image3d_st1(top_blob_3d, ivec3(x4 + 5, 0, 0), v[1].g); + image3d_st1(top_blob_3d, ivec3(x4 + 6, 0, 0), v[1].b); + image3d_st1(top_blob_3d, ivec3(x4 + 7, 0, 0), v[1].a); } #endif } @@ -133,7 +129,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = afpvec8(texelFetch(bottom_blob_2d_fp32, ivec2(gx * 2, gy), 0), texelFetch(bottom_blob_2d_fp32, ivec2(gx * 2 + 1, gy), 0)); + v = afpvec8(texelFetch(bottom_blob_3d_fp32, ivec3(gx * 2, gy, 0), 0), texelFetch(bottom_blob_3d_fp32, ivec3(gx * 2 + 1, gy, 0), 0)); } #endif @@ -158,14 +154,14 @@ void main() { int y4 = gy * 8; - image2d_st1(top_blob_2d, ivec2(gx, y4 + 0), v[0].r); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 1), v[0].g); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 2), v[0].b); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 3), v[0].a); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 4), v[1].r); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 5), v[1].g); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 6), v[1].b); - image2d_st1(top_blob_2d, ivec2(gx, y4 + 7), v[1].a); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 0, 0), v[0].r); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 1, 0), v[0].g); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 2, 0), v[0].b); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 3, 0), v[0].a); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 4, 0), v[1].r); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 5, 0), v[1].g); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 6, 0), v[1].b); + image3d_st1(top_blob_3d, ivec3(gx, y4 + 7, 0), v[1].a); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack8to4.comp b/src/layer/vulkan/shader/packing_pack8to4.comp index 17acddef4..48828dfc3 100644 --- a/src/layer/vulkan/shader/packing_pack8to4.comp +++ b/src/layer/vulkan/shader/packing_pack8to4.comp @@ -41,11 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #endif @@ -86,7 +82,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image1d_ld8(bottom_blob_1d, gx); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, 0, 0)); } #endif @@ -104,8 +100,8 @@ void main() { int x2 = gx * 2; - image1d_st4(top_blob_1d, x2 + 0, v[0]); - image1d_st4(top_blob_1d, x2 + 1, v[1]); + image3d_st4(top_blob_3d, ivec3(x2 + 0, 0, 0), v[0]); + image3d_st4(top_blob_3d, ivec3(x2 + 1, 0, 0), v[1]); } #endif } @@ -120,7 +116,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, 0)); } #endif @@ -138,8 +134,8 @@ void main() { int y2 = gy * 2; - image2d_st4(top_blob_2d, ivec2(gx, y2 + 0), v[0]); - image2d_st4(top_blob_2d, ivec2(gx, y2 + 1), v[1]); + image3d_st4(top_blob_3d, ivec3(gx, y2 + 0, 0), v[0]); + image3d_st4(top_blob_3d, ivec3(gx, y2 + 1, 0), v[1]); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack8to4_fp16_to_fp32.comp b/src/layer/vulkan/shader/packing_pack8to4_fp16_to_fp32.comp index 99b7f320a..65c75049e 100644 --- a/src/layer/vulkan/shader/packing_pack8to4_fp16_to_fp32.comp +++ b/src/layer/vulkan/shader/packing_pack8to4_fp16_to_fp32.comp @@ -41,11 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob_fp32 { vec4 top_blob_fp32_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 2) uniform unfp sampler2D bottom_blob_2d; layout (binding = 2) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 3, rgba32f) writeonly uniform highp image1D top_blob_1d_fp32; -layout (binding = 3, rgba32f) writeonly uniform highp image2D top_blob_2d_fp32; layout (binding = 3, rgba32f) writeonly uniform highp image3D top_blob_3d_fp32; #endif @@ -86,7 +82,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image1d_ld8(bottom_blob_1d, gx); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, 0, 0)); } #endif @@ -104,8 +100,8 @@ void main() { int x2 = gx * 2; - imageStore(top_blob_1d_fp32, x2 + 0, v[0]); - imageStore(top_blob_1d_fp32, x2 + 1, v[1]); + imageStore(top_blob_3d_fp32, ivec3(x2 + 0, 0, 0), v[0]); + imageStore(top_blob_3d_fp32, ivec3(x2 + 1, 0, 0), v[1]); } #endif } @@ -120,7 +116,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, 0)); } #endif @@ -138,8 +134,8 @@ void main() { int y2 = gy * 2; - imageStore(top_blob_2d_fp32, ivec2(gx, y2 + 0), v[0]); - imageStore(top_blob_2d_fp32, ivec2(gx, y2 + 1), v[1]); + imageStore(top_blob_3d_fp32, ivec3(gx, y2 + 0, 0), v[0]); + imageStore(top_blob_3d_fp32, ivec3(gx, y2 + 1, 0), v[1]); } #endif } diff --git a/src/layer/vulkan/shader/packing_pack8to4_fp32_to_fp16.comp b/src/layer/vulkan/shader/packing_pack8to4_fp32_to_fp16.comp index 498274db0..346b026af 100644 --- a/src/layer/vulkan/shader/packing_pack8to4_fp32_to_fp16.comp +++ b/src/layer/vulkan/shader/packing_pack8to4_fp32_to_fp16.comp @@ -41,11 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; layout (binding = 0) readonly buffer bottom_blob_fp32 { mat2x4 bottom_blob_fp32_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; #if NCNN_image_shader -layout (binding = 2) uniform highp sampler1D bottom_blob_1d_fp32; -layout (binding = 2) uniform highp sampler2D bottom_blob_2d_fp32; layout (binding = 2) uniform highp sampler3D bottom_blob_3d_fp32; -layout (binding = 3, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 3, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 3, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #endif @@ -86,7 +82,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = afpvec8(texelFetch(bottom_blob_1d_fp32, gx * 2, 0), texelFetch(bottom_blob_1d_fp32, gx * 2 + 1, 0)); + v = afpvec8(texelFetch(bottom_blob_3d_fp32, ivec3(gx * 2, 0, 0), 0), texelFetch(bottom_blob_3d_fp32, ivec3(gx * 2 + 1, 0, 0), 0)); } #endif @@ -104,8 +100,8 @@ void main() { int x2 = gx * 2; - image1d_st4(top_blob_1d, x2 + 0, v[0]); - image1d_st4(top_blob_1d, x2 + 1, v[1]); + image3d_st4(top_blob_3d, ivec3(x2 + 0, 0, 0), v[0]); + image3d_st4(top_blob_3d, ivec3(x2 + 1, 0, 0), v[1]); } #endif } @@ -120,7 +116,7 @@ void main() #if NCNN_image_shader if (storage_type_from == 1) { - v = afpvec8(texelFetch(bottom_blob_2d_fp32, ivec2(gx * 2, gy), 0), texelFetch(bottom_blob_2d_fp32, ivec2(gx * 2 + 1, gy), 0)); + v = afpvec8(texelFetch(bottom_blob_3d_fp32, ivec3(gx * 2, gy, 0), 0), texelFetch(bottom_blob_3d_fp32, ivec3(gx * 2 + 1, gy, 0), 0)); } #endif @@ -138,8 +134,8 @@ void main() { int y2 = gy * 2; - image2d_st4(top_blob_2d, ivec2(gx, y2 + 0), v[0]); - image2d_st4(top_blob_2d, ivec2(gx, y2 + 1), v[1]); + image3d_st4(top_blob_3d, ivec3(gx, y2 + 0, 0), v[0]); + image3d_st4(top_blob_3d, ivec3(gx, y2 + 1, 0), v[1]); } #endif } diff --git a/src/layer/vulkan/shader/padding.comp b/src/layer/vulkan/shader/padding.comp index 30830cd17..2ba7c45bc 100644 --- a/src/layer/vulkan/shader/padding.comp +++ b/src/layer/vulkan/shader/padding.comp @@ -41,7 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler1D per_channel_pad_blob; +layout (binding = 2) uniform unfp sampler3D per_channel_pad_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -96,7 +96,7 @@ void main() else if (per_channel_pad == 1) { #if NCNN_image_shader - afp v = image1d_ld1(per_channel_pad_blob, gz); + afp v = image3d_ld1(per_channel_pad_blob, ivec3(gz, 0, 0)); image3d_st1(top_blob, ivec3(gx, gy, gz), v); #else buffer_cp1(top_blob_data, gi, per_channel_pad_blob_data, gz); diff --git a/src/layer/vulkan/shader/padding_pack4.comp b/src/layer/vulkan/shader/padding_pack4.comp index a29fc0dc1..abb7c72e1 100644 --- a/src/layer/vulkan/shader/padding_pack4.comp +++ b/src/layer/vulkan/shader/padding_pack4.comp @@ -41,7 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler1D per_channel_pad_blob; +layout (binding = 2) uniform unfp sampler3D per_channel_pad_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -95,7 +95,7 @@ void main() else if (per_channel_pad == 1) { #if NCNN_image_shader - afpvec4 v = image1d_ld4(per_channel_pad_blob, gz); + afpvec4 v = image3d_ld4(per_channel_pad_blob, ivec3(gz, 0, 0)); image3d_st4(top_blob, ivec3(gx, gy, gz), v); #else buffer_cp4(top_blob_data, gi, per_channel_pad_blob_data, gz); diff --git a/src/layer/vulkan/shader/padding_pack8.comp b/src/layer/vulkan/shader/padding_pack8.comp index 183bcfcf8..234c5494f 100644 --- a/src/layer/vulkan/shader/padding_pack8.comp +++ b/src/layer/vulkan/shader/padding_pack8.comp @@ -42,7 +42,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; -layout (binding = 2) uniform unfp sampler1D per_channel_pad_blob; +layout (binding = 2) uniform unfp sampler3D per_channel_pad_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -97,7 +97,7 @@ void main() else if (per_channel_pad == 1) { #if NCNN_image_shader - afpvec8 v = image1d_ld8(per_channel_pad_blob, gz); + afpvec8 v = image3d_ld8(per_channel_pad_blob, ivec3(gz, 0, 0)); image3d_st8(top_blob, ivec3(gx, gy, gz), v); #else buffer_cp8(top_blob_data, gi, per_channel_pad_blob_data, gz); diff --git a/src/layer/vulkan/shader/permute.comp b/src/layer/vulkan/shader/permute.comp index 0e4aa4769..4ad1be2ee 100644 --- a/src/layer/vulkan/shader/permute.comp +++ b/src/layer/vulkan/shader/permute.comp @@ -37,9 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; @@ -79,11 +77,11 @@ void main() if (order_type == 0) { - image2d_cp1(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); + image3d_cp1(top_blob_3d, ivec3(gx, gy, 0), bottom_blob_3d, ivec3(gx, gy, 0)); } if (order_type == 1) { - image2d_cp1(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gy, gx)); + image3d_cp1(top_blob_3d, ivec3(gx, gy, 0), bottom_blob_3d, ivec3(gy, gx, 0)); } } else // if (psc(dims) == 3) diff --git a/src/layer/vulkan/shader/permute_pack1to4.comp b/src/layer/vulkan/shader/permute_pack1to4.comp index 46f7365da..80cfb9250 100644 --- a/src/layer/vulkan/shader/permute_pack1to4.comp +++ b/src/layer/vulkan/shader/permute_pack1to4.comp @@ -37,9 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; @@ -83,22 +81,22 @@ void main() { ivec4 y4 = gy * 4 + ivec4(0, 1, 2, 3); - v.r = image2d_ld1(bottom_blob_2d, ivec2(gx, y4.r)); - v.g = image2d_ld1(bottom_blob_2d, ivec2(gx, y4.g)); - v.b = image2d_ld1(bottom_blob_2d, ivec2(gx, y4.b)); - v.a = image2d_ld1(bottom_blob_2d, ivec2(gx, y4.a)); + v.r = image3d_ld1(bottom_blob_3d, ivec3(gx, y4.r, 0)); + v.g = image3d_ld1(bottom_blob_3d, ivec3(gx, y4.g, 0)); + v.b = image3d_ld1(bottom_blob_3d, ivec3(gx, y4.b, 0)); + v.a = image3d_ld1(bottom_blob_3d, ivec3(gx, y4.a, 0)); } if (order_type == 1) { ivec4 x4 = gy * 4 + ivec4(0, 1, 2, 3); - v.r = image2d_ld1(bottom_blob_2d, ivec2(x4.r, gx)); - v.g = image2d_ld1(bottom_blob_2d, ivec2(x4.g, gx)); - v.b = image2d_ld1(bottom_blob_2d, ivec2(x4.b, gx)); - v.a = image2d_ld1(bottom_blob_2d, ivec2(x4.a, gx)); + v.r = image3d_ld1(bottom_blob_3d, ivec3(x4.r, gx, 0)); + v.g = image3d_ld1(bottom_blob_3d, ivec3(x4.g, gx, 0)); + v.b = image3d_ld1(bottom_blob_3d, ivec3(x4.b, gx, 0)); + v.a = image3d_ld1(bottom_blob_3d, ivec3(x4.a, gx, 0)); } - image2d_st4(top_blob_2d, ivec2(gx, gy), v); + image3d_st4(top_blob_3d, ivec3(gx, gy, 0), v); } else // if (psc(dims) == 3) { diff --git a/src/layer/vulkan/shader/permute_pack1to8.comp b/src/layer/vulkan/shader/permute_pack1to8.comp index 77a20d878..1cb834548 100644 --- a/src/layer/vulkan/shader/permute_pack1to8.comp +++ b/src/layer/vulkan/shader/permute_pack1to8.comp @@ -38,9 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; @@ -85,31 +83,31 @@ void main() ivec4 y4 = gy * 8 + ivec4(0, 1, 2, 3); ivec4 yy4 = y4 + 4; - v[0].r = image2d_ld1(bottom_blob_2d, ivec2(gx, y4.r)); - v[0].g = image2d_ld1(bottom_blob_2d, ivec2(gx, y4.g)); - v[0].b = image2d_ld1(bottom_blob_2d, ivec2(gx, y4.b)); - v[0].a = image2d_ld1(bottom_blob_2d, ivec2(gx, y4.a)); - v[1].r = image2d_ld1(bottom_blob_2d, ivec2(gx, yy4.r)); - v[1].g = image2d_ld1(bottom_blob_2d, ivec2(gx, yy4.g)); - v[1].b = image2d_ld1(bottom_blob_2d, ivec2(gx, yy4.b)); - v[1].a = image2d_ld1(bottom_blob_2d, ivec2(gx, yy4.a)); + v[0].r = image3d_ld1(bottom_blob_3d, ivec3(gx, y4.r, 0)); + v[0].g = image3d_ld1(bottom_blob_3d, ivec3(gx, y4.g, 0)); + v[0].b = image3d_ld1(bottom_blob_3d, ivec3(gx, y4.b, 0)); + v[0].a = image3d_ld1(bottom_blob_3d, ivec3(gx, y4.a, 0)); + v[1].r = image3d_ld1(bottom_blob_3d, ivec3(gx, yy4.r, 0)); + v[1].g = image3d_ld1(bottom_blob_3d, ivec3(gx, yy4.g, 0)); + v[1].b = image3d_ld1(bottom_blob_3d, ivec3(gx, yy4.b, 0)); + v[1].a = image3d_ld1(bottom_blob_3d, ivec3(gx, yy4.a, 0)); } if (order_type == 1) { ivec4 x4 = gy * 8 + ivec4(0, 1, 2, 3); ivec4 xx4 = x4 + 4; - v[0].r = image2d_ld1(bottom_blob_2d, ivec2(x4.r, gx)); - v[0].g = image2d_ld1(bottom_blob_2d, ivec2(x4.g, gx)); - v[0].b = image2d_ld1(bottom_blob_2d, ivec2(x4.b, gx)); - v[0].a = image2d_ld1(bottom_blob_2d, ivec2(x4.a, gx)); - v[1].r = image2d_ld1(bottom_blob_2d, ivec2(xx4.r, gx)); - v[1].g = image2d_ld1(bottom_blob_2d, ivec2(xx4.g, gx)); - v[1].b = image2d_ld1(bottom_blob_2d, ivec2(xx4.b, gx)); - v[1].a = image2d_ld1(bottom_blob_2d, ivec2(xx4.a, gx)); + v[0].r = image3d_ld1(bottom_blob_3d, ivec3(x4.r, gx, 0)); + v[0].g = image3d_ld1(bottom_blob_3d, ivec3(x4.g, gx, 0)); + v[0].b = image3d_ld1(bottom_blob_3d, ivec3(x4.b, gx, 0)); + v[0].a = image3d_ld1(bottom_blob_3d, ivec3(x4.a, gx, 0)); + v[1].r = image3d_ld1(bottom_blob_3d, ivec3(xx4.r, gx, 0)); + v[1].g = image3d_ld1(bottom_blob_3d, ivec3(xx4.g, gx, 0)); + v[1].b = image3d_ld1(bottom_blob_3d, ivec3(xx4.b, gx, 0)); + v[1].a = image3d_ld1(bottom_blob_3d, ivec3(xx4.a, gx, 0)); } - image2d_st8(top_blob_2d, ivec2(gx, gy), v); + image3d_st8(top_blob_3d, ivec3(gx, gy, 0), v); } else // if (psc(dims) == 3) { diff --git a/src/layer/vulkan/shader/permute_pack4.comp b/src/layer/vulkan/shader/permute_pack4.comp index 28b165f66..686627d03 100644 --- a/src/layer/vulkan/shader/permute_pack4.comp +++ b/src/layer/vulkan/shader/permute_pack4.comp @@ -37,9 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else #if NCNN_fp16_packed @@ -95,16 +93,16 @@ void main() ivec4 y4 = i4 / psc(w); ivec4 x4 = i4 % psc(w); - afpvec4 vr = image2d_ld4(bottom_blob_2d, ivec2(x4.r, y4.r / 4)); - afpvec4 vg = image2d_ld4(bottom_blob_2d, ivec2(x4.g, y4.g / 4)); - afpvec4 vb = image2d_ld4(bottom_blob_2d, ivec2(x4.b, y4.b / 4)); - afpvec4 va = image2d_ld4(bottom_blob_2d, ivec2(x4.a, y4.a / 4)); + afpvec4 vr = image3d_ld4(bottom_blob_3d, ivec3(x4.r, y4.r / 4, 0)); + afpvec4 vg = image3d_ld4(bottom_blob_3d, ivec3(x4.g, y4.g / 4, 0)); + afpvec4 vb = image3d_ld4(bottom_blob_3d, ivec3(x4.b, y4.b / 4, 0)); + afpvec4 va = image3d_ld4(bottom_blob_3d, ivec3(x4.a, y4.a / 4, 0)); ivec4 lane4 = y4 % 4; afpvec4 v = afpvec4(vr[lane4.r], vg[lane4.g], vb[lane4.b], va[lane4.a]); - image2d_st4(top_blob_2d, ivec2(gx, gy), v); + image3d_st4(top_blob_3d, ivec3(gx, gy, 0), v); } else // if (psc(dims) == 3) { diff --git a/src/layer/vulkan/shader/permute_pack4to1.comp b/src/layer/vulkan/shader/permute_pack4to1.comp index db11fbb47..cf51494fe 100644 --- a/src/layer/vulkan/shader/permute_pack4to1.comp +++ b/src/layer/vulkan/shader/permute_pack4to1.comp @@ -37,9 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; @@ -77,25 +75,25 @@ void main() // 0 = w h // 1 = h w - afpvec4 v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); + afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, 0)); if (order_type == 0) { ivec4 y4 = gy * 4 + ivec4(0, 1, 2, 3); - image2d_st1(top_blob_2d, ivec2(gx, y4.r), v.r); - image2d_st1(top_blob_2d, ivec2(gx, y4.g), v.g); - image2d_st1(top_blob_2d, ivec2(gx, y4.b), v.b); - image2d_st1(top_blob_2d, ivec2(gx, y4.a), v.a); + image3d_st1(top_blob_3d, ivec3(gx, y4.r, 0), v.r); + image3d_st1(top_blob_3d, ivec3(gx, y4.g, 0), v.g); + image3d_st1(top_blob_3d, ivec3(gx, y4.b, 0), v.b); + image3d_st1(top_blob_3d, ivec3(gx, y4.a, 0), v.a); } if (order_type == 1) { ivec4 x4 = gy * 4 + ivec4(0, 1, 2, 3); - image2d_st1(top_blob_2d, ivec2(x4.r, gx), v.r); - image2d_st1(top_blob_2d, ivec2(x4.g, gx), v.g); - image2d_st1(top_blob_2d, ivec2(x4.b, gx), v.b); - image2d_st1(top_blob_2d, ivec2(x4.a, gx), v.a); + image3d_st1(top_blob_3d, ivec3(x4.r, gx, 0), v.r); + image3d_st1(top_blob_3d, ivec3(x4.g, gx, 0), v.g); + image3d_st1(top_blob_3d, ivec3(x4.b, gx, 0), v.b); + image3d_st1(top_blob_3d, ivec3(x4.a, gx, 0), v.a); } } else // if (psc(dims) == 3) diff --git a/src/layer/vulkan/shader/permute_pack4to8.comp b/src/layer/vulkan/shader/permute_pack4to8.comp index 3b416dca1..6efe3df58 100644 --- a/src/layer/vulkan/shader/permute_pack4to8.comp +++ b/src/layer/vulkan/shader/permute_pack4to8.comp @@ -38,9 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else #if NCNN_fp16_packed @@ -101,14 +99,14 @@ void main() ivec4 yy4 = ii4 / psc(w); ivec4 xx4 = ii4 % psc(w); - afpvec4 v0 = image2d_ld4(bottom_blob_2d, ivec2(x4.r, y4.r / 4)); - afpvec4 v1 = image2d_ld4(bottom_blob_2d, ivec2(x4.g, y4.g / 4)); - afpvec4 v2 = image2d_ld4(bottom_blob_2d, ivec2(x4.b, y4.b / 4)); - afpvec4 v3 = image2d_ld4(bottom_blob_2d, ivec2(x4.a, y4.a / 4)); - afpvec4 v4 = image2d_ld4(bottom_blob_2d, ivec2(xx4.r, yy4.r / 4)); - afpvec4 v5 = image2d_ld4(bottom_blob_2d, ivec2(xx4.g, yy4.g / 4)); - afpvec4 v6 = image2d_ld4(bottom_blob_2d, ivec2(xx4.b, yy4.b / 4)); - afpvec4 v7 = image2d_ld4(bottom_blob_2d, ivec2(xx4.a, yy4.a / 4)); + afpvec4 v0 = image3d_ld4(bottom_blob_3d, ivec3(x4.r, y4.r / 4, 0)); + afpvec4 v1 = image3d_ld4(bottom_blob_3d, ivec3(x4.g, y4.g / 4, 0)); + afpvec4 v2 = image3d_ld4(bottom_blob_3d, ivec3(x4.b, y4.b / 4, 0)); + afpvec4 v3 = image3d_ld4(bottom_blob_3d, ivec3(x4.a, y4.a / 4, 0)); + afpvec4 v4 = image3d_ld4(bottom_blob_3d, ivec3(xx4.r, yy4.r / 4, 0)); + afpvec4 v5 = image3d_ld4(bottom_blob_3d, ivec3(xx4.g, yy4.g / 4, 0)); + afpvec4 v6 = image3d_ld4(bottom_blob_3d, ivec3(xx4.b, yy4.b / 4, 0)); + afpvec4 v7 = image3d_ld4(bottom_blob_3d, ivec3(xx4.a, yy4.a / 4, 0)); afpvec8 v; v[0].r = v0[y4.r % 4]; @@ -120,7 +118,7 @@ void main() v[1].b = v6[yy4.b % 4]; v[1].a = v7[yy4.a % 4]; - image2d_st8(top_blob_2d, ivec2(gx, gy), v); + image3d_st8(top_blob_3d, ivec3(gx, gy, 0), v); } else // if (psc(dims) == 3) { diff --git a/src/layer/vulkan/shader/permute_pack8.comp b/src/layer/vulkan/shader/permute_pack8.comp index 5ad9c329a..d258df42d 100644 --- a/src/layer/vulkan/shader/permute_pack8.comp +++ b/src/layer/vulkan/shader/permute_pack8.comp @@ -38,9 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else #if NCNN_fp16_packed @@ -101,14 +99,14 @@ void main() ivec4 yy4 = i4 / psc(w); ivec4 xx4 = i4 % psc(w); - afpvec8 v0 = image2d_ld8(bottom_blob_2d, ivec2(x4.r, y4.r / 8)); - afpvec8 v1 = image2d_ld8(bottom_blob_2d, ivec2(x4.g, y4.g / 8)); - afpvec8 v2 = image2d_ld8(bottom_blob_2d, ivec2(x4.b, y4.b / 8)); - afpvec8 v3 = image2d_ld8(bottom_blob_2d, ivec2(x4.a, y4.a / 8)); - afpvec8 v4 = image2d_ld8(bottom_blob_2d, ivec2(xx4.r, yy4.r / 8)); - afpvec8 v5 = image2d_ld8(bottom_blob_2d, ivec2(xx4.g, yy4.g / 8)); - afpvec8 v6 = image2d_ld8(bottom_blob_2d, ivec2(xx4.b, yy4.b / 8)); - afpvec8 v7 = image2d_ld8(bottom_blob_2d, ivec2(xx4.a, yy4.a / 8)); + afpvec8 v0 = image3d_ld8(bottom_blob_3d, ivec3(x4.r, y4.r / 8, 0)); + afpvec8 v1 = image3d_ld8(bottom_blob_3d, ivec3(x4.g, y4.g / 8, 0)); + afpvec8 v2 = image3d_ld8(bottom_blob_3d, ivec3(x4.b, y4.b / 8, 0)); + afpvec8 v3 = image3d_ld8(bottom_blob_3d, ivec3(x4.a, y4.a / 8, 0)); + afpvec8 v4 = image3d_ld8(bottom_blob_3d, ivec3(xx4.r, yy4.r / 8, 0)); + afpvec8 v5 = image3d_ld8(bottom_blob_3d, ivec3(xx4.g, yy4.g / 8, 0)); + afpvec8 v6 = image3d_ld8(bottom_blob_3d, ivec3(xx4.b, yy4.b / 8, 0)); + afpvec8 v7 = image3d_ld8(bottom_blob_3d, ivec3(xx4.a, yy4.a / 8, 0)); afpvec8 v; v[0].r = v0[(y4.r % 8) / 4][y4.r % 4]; @@ -120,7 +118,7 @@ void main() v[1].b = v6[(yy4.b % 8) / 4][yy4.b % 4]; v[1].a = v7[(yy4.a % 8) / 4][yy4.a % 4]; - image2d_st8(top_blob_2d, ivec2(gx, gy), v); + image3d_st8(top_blob_3d, ivec3(gx, gy, 0), v); } else // if (psc(dims) == 3) { diff --git a/src/layer/vulkan/shader/permute_pack8to1.comp b/src/layer/vulkan/shader/permute_pack8to1.comp index 244f204d7..f73303a89 100644 --- a/src/layer/vulkan/shader/permute_pack8to1.comp +++ b/src/layer/vulkan/shader/permute_pack8to1.comp @@ -38,9 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; @@ -78,35 +76,35 @@ void main() // 0 = w h // 1 = h w - afpvec8 v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); + afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, 0)); if (order_type == 0) { ivec4 y4 = gy * 8 + ivec4(0, 1, 2, 3); ivec4 yy4 = y4 + 4; - image2d_st1(top_blob_2d, ivec2(gx, y4.r), v[0].r); - image2d_st1(top_blob_2d, ivec2(gx, y4.g), v[0].g); - image2d_st1(top_blob_2d, ivec2(gx, y4.b), v[0].b); - image2d_st1(top_blob_2d, ivec2(gx, y4.a), v[0].a); - image2d_st1(top_blob_2d, ivec2(gx, yy4.r), v[1].r); - image2d_st1(top_blob_2d, ivec2(gx, yy4.g), v[1].g); - image2d_st1(top_blob_2d, ivec2(gx, yy4.b), v[1].b); - image2d_st1(top_blob_2d, ivec2(gx, yy4.a), v[1].a); + image3d_st1(top_blob_3d, ivec3(gx, y4.r, 0), v[0].r); + image3d_st1(top_blob_3d, ivec3(gx, y4.g, 0), v[0].g); + image3d_st1(top_blob_3d, ivec3(gx, y4.b, 0), v[0].b); + image3d_st1(top_blob_3d, ivec3(gx, y4.a, 0), v[0].a); + image3d_st1(top_blob_3d, ivec3(gx, yy4.r, 0), v[1].r); + image3d_st1(top_blob_3d, ivec3(gx, yy4.g, 0), v[1].g); + image3d_st1(top_blob_3d, ivec3(gx, yy4.b, 0), v[1].b); + image3d_st1(top_blob_3d, ivec3(gx, yy4.a, 0), v[1].a); } if (order_type == 1) { ivec4 x4 = gy * 8 + ivec4(0, 1, 2, 3); ivec4 xx4 = x4 + 4; - image2d_st1(top_blob_2d, ivec2(x4.r, gx), v[0].r); - image2d_st1(top_blob_2d, ivec2(x4.g, gx), v[0].g); - image2d_st1(top_blob_2d, ivec2(x4.b, gx), v[0].b); - image2d_st1(top_blob_2d, ivec2(x4.a, gx), v[0].a); - image2d_st1(top_blob_2d, ivec2(xx4.r, gx), v[1].r); - image2d_st1(top_blob_2d, ivec2(xx4.g, gx), v[1].g); - image2d_st1(top_blob_2d, ivec2(xx4.b, gx), v[1].b); - image2d_st1(top_blob_2d, ivec2(xx4.a, gx), v[1].a); + image3d_st1(top_blob_3d, ivec3(x4.r, gx, 0), v[0].r); + image3d_st1(top_blob_3d, ivec3(x4.g, gx, 0), v[0].g); + image3d_st1(top_blob_3d, ivec3(x4.b, gx, 0), v[0].b); + image3d_st1(top_blob_3d, ivec3(x4.a, gx, 0), v[0].a); + image3d_st1(top_blob_3d, ivec3(xx4.r, gx, 0), v[1].r); + image3d_st1(top_blob_3d, ivec3(xx4.g, gx, 0), v[1].g); + image3d_st1(top_blob_3d, ivec3(xx4.b, gx, 0), v[1].b); + image3d_st1(top_blob_3d, ivec3(xx4.a, gx, 0), v[1].a); } } else // if (psc(dims) == 3) diff --git a/src/layer/vulkan/shader/permute_pack8to4.comp b/src/layer/vulkan/shader/permute_pack8to4.comp index adb0dece1..7a8b07f49 100644 --- a/src/layer/vulkan/shader/permute_pack8to4.comp +++ b/src/layer/vulkan/shader/permute_pack8to4.comp @@ -37,9 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else #if NCNN_fp16_packed @@ -95,10 +93,10 @@ void main() ivec4 y4 = i4 / psc(w); ivec4 x4 = i4 % psc(w); - afpvec8 v0 = image2d_ld8(bottom_blob_2d, ivec2(x4.r, y4.r / 8)); - afpvec8 v1 = image2d_ld8(bottom_blob_2d, ivec2(x4.g, y4.g / 8)); - afpvec8 v2 = image2d_ld8(bottom_blob_2d, ivec2(x4.b, y4.b / 8)); - afpvec8 v3 = image2d_ld8(bottom_blob_2d, ivec2(x4.a, y4.a / 8)); + afpvec8 v0 = image3d_ld8(bottom_blob_3d, ivec3(x4.r, y4.r / 8, 0)); + afpvec8 v1 = image3d_ld8(bottom_blob_3d, ivec3(x4.g, y4.g / 8, 0)); + afpvec8 v2 = image3d_ld8(bottom_blob_3d, ivec3(x4.b, y4.b / 8, 0)); + afpvec8 v3 = image3d_ld8(bottom_blob_3d, ivec3(x4.a, y4.a / 8, 0)); afpvec4 v; v.r = v0[(y4.r % 8) / 4][y4.r % 4]; @@ -106,7 +104,7 @@ void main() v.b = v2[(y4.b % 8) / 4][y4.b % 4]; v.a = v3[(y4.a % 8) / 4][y4.a % 4]; - image2d_st4(top_blob_2d, ivec2(gx, gy), v); + image3d_st4(top_blob_3d, ivec3(gx, gy, 0), v); } else // if (psc(dims) == 3) { diff --git a/src/layer/vulkan/shader/pooling_global.comp b/src/layer/vulkan/shader/pooling_global.comp index 2beb2bab3..fe2eb16c1 100644 --- a/src/layer/vulkan/shader/pooling_global.comp +++ b/src/layer/vulkan/shader/pooling_global.comp @@ -40,7 +40,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob; +layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; @@ -119,7 +119,7 @@ void main() } #if NCNN_image_shader - image1d_st1(top_blob, gx, res); + image3d_st1(top_blob, ivec3(gx, 0, 0), res); #else buffer_st1(top_blob_data, gx, res); #endif diff --git a/src/layer/vulkan/shader/pooling_global_pack4.comp b/src/layer/vulkan/shader/pooling_global_pack4.comp index 1b1bdc097..cfba138d3 100644 --- a/src/layer/vulkan/shader/pooling_global_pack4.comp +++ b/src/layer/vulkan/shader/pooling_global_pack4.comp @@ -40,7 +40,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; @@ -119,7 +119,7 @@ void main() } #if NCNN_image_shader - image1d_st4(top_blob, gx, res); + image3d_st4(top_blob, ivec3(gx, 0, 0), res); #else buffer_st4(top_blob_data, gx, res); #endif diff --git a/src/layer/vulkan/shader/pooling_global_pack8.comp b/src/layer/vulkan/shader/pooling_global_pack8.comp index 01b42f54a..43ebed9e6 100644 --- a/src/layer/vulkan/shader/pooling_global_pack8.comp +++ b/src/layer/vulkan/shader/pooling_global_pack8.comp @@ -41,7 +41,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader layout (binding = 0) uniform unfp sampler3D bottom_blob; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; @@ -128,7 +128,7 @@ void main() } #if NCNN_image_shader - image1d_st8(top_blob, gx, res); + image3d_st8(top_blob, ivec3(gx, 0, 0), res); #else buffer_st8(top_blob_data, gx, res); #endif diff --git a/src/layer/vulkan/shader/prelu.comp b/src/layer/vulkan/shader/prelu.comp index 7b91858e6..7977e3127 100644 --- a/src/layer/vulkan/shader/prelu.comp +++ b/src/layer/vulkan/shader/prelu.comp @@ -32,13 +32,9 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; -layout (binding = 2) uniform unfp sampler1D slope_blob; +layout (binding = 2) uniform unfp sampler3D slope_blob; #else layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer slope_blob { sfp slope_blob_data[]; }; @@ -63,19 +59,7 @@ void main() return; #if NCNN_image_shader - afp v; - if (psc(dims) == 1) - { - v = image1d_ld1(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -94,7 +78,7 @@ void main() const int abi = gxyz[psc(dims) - 1]; #if NCNN_image_shader - slope = image1d_ld1(slope_blob, abi); + slope = image3d_ld1(slope_blob, ivec3(abi, 0, 0)); #else slope = buffer_ld1(slope_blob_data, abi); #endif @@ -103,18 +87,7 @@ void main() v = v < afp(0.f) ? v * slope : v; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st1(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st1(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/prelu_pack4.comp b/src/layer/vulkan/shader/prelu_pack4.comp index 963f509bf..327dcdf0c 100644 --- a/src/layer/vulkan/shader/prelu_pack4.comp +++ b/src/layer/vulkan/shader/prelu_pack4.comp @@ -32,13 +32,9 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; -layout (binding = 2) uniform unfp sampler1D slope_blob; +layout (binding = 2) uniform unfp sampler3D slope_blob; #else layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer slope_blob { sfpvec4 slope_blob_data[]; }; @@ -63,19 +59,7 @@ void main() return; #if NCNN_image_shader - afpvec4 v; - if (psc(dims) == 1) - { - v = image1d_ld4(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -94,7 +78,7 @@ void main() const int abi = gxyz[psc(dims) - 1]; #if NCNN_image_shader - slope = image1d_ld4(slope_blob, abi); + slope = image3d_ld4(slope_blob, ivec3(abi, 0, 0)); #else slope = buffer_ld4(slope_blob_data, abi); #endif @@ -103,18 +87,7 @@ void main() v = mix(v, v * slope, lessThan(v, afpvec4(0.f))); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st4(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st4(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/prelu_pack8.comp b/src/layer/vulkan/shader/prelu_pack8.comp index 2d36e6917..3172d385b 100644 --- a/src/layer/vulkan/shader/prelu_pack8.comp +++ b/src/layer/vulkan/shader/prelu_pack8.comp @@ -33,13 +33,9 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; -layout (binding = 2) uniform unfp sampler1D slope_blob; +layout (binding = 2) uniform unfp sampler3D slope_blob; #else layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer slope_blob { sfpvec8 slope_blob_data[]; }; @@ -64,19 +60,7 @@ void main() return; #if NCNN_image_shader - afpvec8 v; - if (psc(dims) == 1) - { - v = image1d_ld8(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -95,7 +79,7 @@ void main() const int abi = gxyz[psc(dims) - 1]; #if NCNN_image_shader - slope = image1d_ld8(slope_blob, abi); + slope = image3d_ld8(slope_blob, ivec3(abi, 0, 0)); #else slope = buffer_ld8(slope_blob_data, abi); #endif @@ -105,18 +89,7 @@ void main() v[1] = mix(v[1], v[1] * slope[1], lessThan(v[1], afpvec4(0.f))); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st8(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st8(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/relu.comp b/src/layer/vulkan/shader/relu.comp index a2fcb3f87..902c2fda0 100644 --- a/src/layer/vulkan/shader/relu.comp +++ b/src/layer/vulkan/shader/relu.comp @@ -31,11 +31,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; @@ -60,19 +56,7 @@ void main() return; #if NCNN_image_shader - afp v; - if (psc(dims) == 1) - { - v = image1d_ld1(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -85,18 +69,7 @@ void main() v = v < afp(0.f) ? v * afp(slope) : v; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st1(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st1(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/relu_pack4.comp b/src/layer/vulkan/shader/relu_pack4.comp index 4f94854bd..68bab4144 100644 --- a/src/layer/vulkan/shader/relu_pack4.comp +++ b/src/layer/vulkan/shader/relu_pack4.comp @@ -31,11 +31,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; @@ -60,19 +56,7 @@ void main() return; #if NCNN_image_shader - afpvec4 v; - if (psc(dims) == 1) - { - v = image1d_ld4(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -85,18 +69,7 @@ void main() v = mix(v, v * afp(slope), lessThan(v, afpvec4(0.f))); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st4(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st4(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/relu_pack8.comp b/src/layer/vulkan/shader/relu_pack8.comp index 30de42980..9e7fd9c13 100644 --- a/src/layer/vulkan/shader/relu_pack8.comp +++ b/src/layer/vulkan/shader/relu_pack8.comp @@ -32,11 +32,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; @@ -61,19 +57,7 @@ void main() return; #if NCNN_image_shader - afpvec8 v; - if (psc(dims) == 1) - { - v = image1d_ld8(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -92,18 +76,7 @@ void main() } #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st8(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st8(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/reshape.comp b/src/layer/vulkan/shader/reshape.comp index 118848b3b..f366c4ef4 100644 --- a/src/layer/vulkan/shader/reshape.comp +++ b/src/layer/vulkan/shader/reshape.comp @@ -37,11 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; @@ -89,11 +85,11 @@ void main() if (psc(dims) == 1) { - v = image1d_ld1(bottom_blob_1d, x); + v = image3d_ld1(bottom_blob_3d, ivec3(x, 0, 0)); } else if (psc(dims) == 2) { - v = image2d_ld1(bottom_blob_2d, ivec2(x, y)); + v = image3d_ld1(bottom_blob_3d, ivec3(x, y, 0)); } else // if (psc(dims) == 3) { @@ -102,11 +98,11 @@ void main() if (ndim == 1) { - image1d_st1(top_blob_1d, gx, v); + image3d_st1(top_blob_3d, ivec3(gx, 0, 0), v); } if (ndim == 2) { - image2d_st1(top_blob_2d, ivec2(gx, gy), v); + image3d_st1(top_blob_3d, ivec3(gx, gy, 0), v); } if (ndim == 3) { diff --git a/src/layer/vulkan/shader/reshape_pack1to4.comp b/src/layer/vulkan/shader/reshape_pack1to4.comp index 6f9d2ad5d..b4443da97 100644 --- a/src/layer/vulkan/shader/reshape_pack1to4.comp +++ b/src/layer/vulkan/shader/reshape_pack1to4.comp @@ -37,11 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; @@ -89,17 +85,17 @@ void main() if (psc(dims) == 1) { - v.r = image1d_ld1(bottom_blob_1d, x4.r); - v.g = image1d_ld1(bottom_blob_1d, x4.g); - v.b = image1d_ld1(bottom_blob_1d, x4.b); - v.a = image1d_ld1(bottom_blob_1d, x4.a); + v.r = image3d_ld1(bottom_blob_3d, ivec3(x4.r, 0, 0)); + v.g = image3d_ld1(bottom_blob_3d, ivec3(x4.g, 0, 0)); + v.b = image3d_ld1(bottom_blob_3d, ivec3(x4.b, 0, 0)); + v.a = image3d_ld1(bottom_blob_3d, ivec3(x4.a, 0, 0)); } else if (psc(dims) == 2) { - v.r = image2d_ld1(bottom_blob_2d, ivec2(x4.r, y4.r)); - v.g = image2d_ld1(bottom_blob_2d, ivec2(x4.g, y4.g)); - v.b = image2d_ld1(bottom_blob_2d, ivec2(x4.b, y4.b)); - v.a = image2d_ld1(bottom_blob_2d, ivec2(x4.a, y4.a)); + v.r = image3d_ld1(bottom_blob_3d, ivec3(x4.r, y4.r, 0)); + v.g = image3d_ld1(bottom_blob_3d, ivec3(x4.g, y4.g, 0)); + v.b = image3d_ld1(bottom_blob_3d, ivec3(x4.b, y4.b, 0)); + v.a = image3d_ld1(bottom_blob_3d, ivec3(x4.a, y4.a, 0)); } else // if (psc(dims) == 3) { @@ -111,11 +107,11 @@ void main() if (ndim == 1) { - image1d_st4(top_blob_1d, gx, v); + image3d_st4(top_blob_3d, ivec3(gx, 0, 0), v); } if (ndim == 2) { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); + image3d_st4(top_blob_3d, ivec3(gx, gy, 0), v); } if (ndim == 3) { diff --git a/src/layer/vulkan/shader/reshape_pack1to8.comp b/src/layer/vulkan/shader/reshape_pack1to8.comp index df77dc47b..3a073dc69 100644 --- a/src/layer/vulkan/shader/reshape_pack1to8.comp +++ b/src/layer/vulkan/shader/reshape_pack1to8.comp @@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; @@ -106,25 +102,25 @@ void main() if (psc(dims) == 1) { - v[0].r = image1d_ld1(bottom_blob_1d, x4.r); - v[0].g = image1d_ld1(bottom_blob_1d, x4.g); - v[0].b = image1d_ld1(bottom_blob_1d, x4.b); - v[0].a = image1d_ld1(bottom_blob_1d, x4.a); - v[1].r = image1d_ld1(bottom_blob_1d, xx4.r); - v[1].g = image1d_ld1(bottom_blob_1d, xx4.g); - v[1].b = image1d_ld1(bottom_blob_1d, xx4.b); - v[1].a = image1d_ld1(bottom_blob_1d, xx4.a); + v[0].r = image3d_ld1(bottom_blob_3d, ivec3(x4.r, 0, 0)); + v[0].g = image3d_ld1(bottom_blob_3d, ivec3(x4.g, 0, 0)); + v[0].b = image3d_ld1(bottom_blob_3d, ivec3(x4.b, 0, 0)); + v[0].a = image3d_ld1(bottom_blob_3d, ivec3(x4.a, 0, 0)); + v[1].r = image3d_ld1(bottom_blob_3d, ivec3(xx4.r, 0, 0)); + v[1].g = image3d_ld1(bottom_blob_3d, ivec3(xx4.g, 0, 0)); + v[1].b = image3d_ld1(bottom_blob_3d, ivec3(xx4.b, 0, 0)); + v[1].a = image3d_ld1(bottom_blob_3d, ivec3(xx4.a, 0, 0)); } else if (psc(dims) == 2) { - v[0].r = image2d_ld1(bottom_blob_2d, ivec2(x4.r, y4.r)); - v[0].g = image2d_ld1(bottom_blob_2d, ivec2(x4.g, y4.g)); - v[0].b = image2d_ld1(bottom_blob_2d, ivec2(x4.b, y4.b)); - v[0].a = image2d_ld1(bottom_blob_2d, ivec2(x4.a, y4.a)); - v[1].r = image2d_ld1(bottom_blob_2d, ivec2(xx4.r, yy4.r)); - v[1].g = image2d_ld1(bottom_blob_2d, ivec2(xx4.g, yy4.g)); - v[1].b = image2d_ld1(bottom_blob_2d, ivec2(xx4.b, yy4.b)); - v[1].a = image2d_ld1(bottom_blob_2d, ivec2(xx4.a, yy4.a)); + v[0].r = image3d_ld1(bottom_blob_3d, ivec3(x4.r, y4.r, 0)); + v[0].g = image3d_ld1(bottom_blob_3d, ivec3(x4.g, y4.g, 0)); + v[0].b = image3d_ld1(bottom_blob_3d, ivec3(x4.b, y4.b, 0)); + v[0].a = image3d_ld1(bottom_blob_3d, ivec3(x4.a, y4.a, 0)); + v[1].r = image3d_ld1(bottom_blob_3d, ivec3(xx4.r, yy4.r, 0)); + v[1].g = image3d_ld1(bottom_blob_3d, ivec3(xx4.g, yy4.g, 0)); + v[1].b = image3d_ld1(bottom_blob_3d, ivec3(xx4.b, yy4.b, 0)); + v[1].a = image3d_ld1(bottom_blob_3d, ivec3(xx4.a, yy4.a, 0)); } else // if (psc(dims) == 3) { @@ -140,11 +136,11 @@ void main() if (ndim == 1) { - image1d_st8(top_blob_1d, gx, v); + image3d_st8(top_blob_3d, ivec3(gx, 0, 0), v); } if (ndim == 2) { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); + image3d_st8(top_blob_3d, ivec3(gx, gy, 0), v); } if (ndim == 3) { diff --git a/src/layer/vulkan/shader/reshape_pack4.comp b/src/layer/vulkan/shader/reshape_pack4.comp index 475b5a3d2..868a8bcea 100644 --- a/src/layer/vulkan/shader/reshape_pack4.comp +++ b/src/layer/vulkan/shader/reshape_pack4.comp @@ -37,11 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else #if NCNN_fp16_packed @@ -94,10 +90,10 @@ void main() { ivec4 x4 = i4; - vr = image1d_ld4(bottom_blob_1d, x4.r / 4); - vg = image1d_ld4(bottom_blob_1d, x4.g / 4); - vb = image1d_ld4(bottom_blob_1d, x4.b / 4); - va = image1d_ld4(bottom_blob_1d, x4.a / 4); + vr = image3d_ld4(bottom_blob_3d, ivec3(x4.r / 4, 0, 0)); + vg = image3d_ld4(bottom_blob_3d, ivec3(x4.g / 4, 0, 0)); + vb = image3d_ld4(bottom_blob_3d, ivec3(x4.b / 4, 0, 0)); + va = image3d_ld4(bottom_blob_3d, ivec3(x4.a / 4, 0, 0)); lane4 = x4 % 4; } @@ -106,10 +102,10 @@ void main() ivec4 y4 = i4 / psc(w); ivec4 x4 = i4 % psc(w); - vr = image2d_ld4(bottom_blob_2d, ivec2(x4.r, y4.r / 4)); - vg = image2d_ld4(bottom_blob_2d, ivec2(x4.g, y4.g / 4)); - vb = image2d_ld4(bottom_blob_2d, ivec2(x4.b, y4.b / 4)); - va = image2d_ld4(bottom_blob_2d, ivec2(x4.a, y4.a / 4)); + vr = image3d_ld4(bottom_blob_3d, ivec3(x4.r, y4.r / 4, 0)); + vg = image3d_ld4(bottom_blob_3d, ivec3(x4.g, y4.g / 4, 0)); + vb = image3d_ld4(bottom_blob_3d, ivec3(x4.b, y4.b / 4, 0)); + va = image3d_ld4(bottom_blob_3d, ivec3(x4.a, y4.a / 4, 0)); lane4 = y4 % 4; } @@ -133,11 +129,11 @@ void main() if (ndim == 1) { - image1d_st4(top_blob_1d, gx, v); + image3d_st4(top_blob_3d, ivec3(gx, 0, 0), v); } if (ndim == 2) { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); + image3d_st4(top_blob_3d, ivec3(gx, gy, 0), v); } if (ndim == 3) { diff --git a/src/layer/vulkan/shader/reshape_pack4to1.comp b/src/layer/vulkan/shader/reshape_pack4to1.comp index 0d33ac3db..908a21f7c 100644 --- a/src/layer/vulkan/shader/reshape_pack4to1.comp +++ b/src/layer/vulkan/shader/reshape_pack4to1.comp @@ -37,11 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; @@ -87,11 +83,11 @@ void main() if (psc(dims) == 1) { - v = image1d_ld4(bottom_blob_1d, gx); + v = image3d_ld4(bottom_blob_3d, ivec3(gx, 0, 0)); } else if (psc(dims) == 2) { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); + v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, 0)); } else // if (psc(dims) == 3) { @@ -102,20 +98,20 @@ void main() { ivec4 x4 = i4; - image1d_st1(top_blob_1d, x4.r, v.r); - image1d_st1(top_blob_1d, x4.g, v.g); - image1d_st1(top_blob_1d, x4.b, v.b); - image1d_st1(top_blob_1d, x4.a, v.a); + image3d_st1(top_blob_3d, ivec3(x4.r, 0, 0), v.r); + image3d_st1(top_blob_3d, ivec3(x4.g, 0, 0), v.g); + image3d_st1(top_blob_3d, ivec3(x4.b, 0, 0), v.b); + image3d_st1(top_blob_3d, ivec3(x4.a, 0, 0), v.a); } if (ndim == 2) { ivec4 y4 = i4 / psc(outw); ivec4 x4 = i4 % psc(outw); - image2d_st1(top_blob_2d, ivec2(x4.r, y4.r), v.r); - image2d_st1(top_blob_2d, ivec2(x4.g, y4.g), v.g); - image2d_st1(top_blob_2d, ivec2(x4.b, y4.b), v.b); - image2d_st1(top_blob_2d, ivec2(x4.a, y4.a), v.a); + image3d_st1(top_blob_3d, ivec3(x4.r, y4.r, 0), v.r); + image3d_st1(top_blob_3d, ivec3(x4.g, y4.g, 0), v.g); + image3d_st1(top_blob_3d, ivec3(x4.b, y4.b, 0), v.b); + image3d_st1(top_blob_3d, ivec3(x4.a, y4.a, 0), v.a); } if (ndim == 3) { diff --git a/src/layer/vulkan/shader/reshape_pack4to8.comp b/src/layer/vulkan/shader/reshape_pack4to8.comp index 383bedf35..f6c97f4c2 100644 --- a/src/layer/vulkan/shader/reshape_pack4to8.comp +++ b/src/layer/vulkan/shader/reshape_pack4to8.comp @@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else #if NCNN_fp16_packed @@ -104,14 +100,14 @@ void main() ivec4 x4 = i4; ivec4 xx4 = ii4; - afpvec4 v0 = image1d_ld4(bottom_blob_1d, x4.r / 4); - afpvec4 v1 = image1d_ld4(bottom_blob_1d, x4.g / 4); - afpvec4 v2 = image1d_ld4(bottom_blob_1d, x4.b / 4); - afpvec4 v3 = image1d_ld4(bottom_blob_1d, x4.a / 4); - afpvec4 v4 = image1d_ld4(bottom_blob_1d, xx4.r / 4); - afpvec4 v5 = image1d_ld4(bottom_blob_1d, xx4.g / 4); - afpvec4 v6 = image1d_ld4(bottom_blob_1d, xx4.b / 4); - afpvec4 v7 = image1d_ld4(bottom_blob_1d, xx4.a / 4); + afpvec4 v0 = image3d_ld4(bottom_blob_3d, ivec3(x4.r / 4, 0, 0)); + afpvec4 v1 = image3d_ld4(bottom_blob_3d, ivec3(x4.g / 4, 0, 0)); + afpvec4 v2 = image3d_ld4(bottom_blob_3d, ivec3(x4.b / 4, 0, 0)); + afpvec4 v3 = image3d_ld4(bottom_blob_3d, ivec3(x4.a / 4, 0, 0)); + afpvec4 v4 = image3d_ld4(bottom_blob_3d, ivec3(xx4.r / 4, 0, 0)); + afpvec4 v5 = image3d_ld4(bottom_blob_3d, ivec3(xx4.g / 4, 0, 0)); + afpvec4 v6 = image3d_ld4(bottom_blob_3d, ivec3(xx4.b / 4, 0, 0)); + afpvec4 v7 = image3d_ld4(bottom_blob_3d, ivec3(xx4.a / 4, 0, 0)); v[0].r = v0[x4.r % 4]; v[0].g = v1[x4.g % 4]; @@ -129,14 +125,14 @@ void main() ivec4 yy4 = ii4 / psc(w); ivec4 xx4 = ii4 % psc(w); - afpvec4 v0 = image2d_ld4(bottom_blob_2d, ivec2(x4.r, y4.r / 4)); - afpvec4 v1 = image2d_ld4(bottom_blob_2d, ivec2(x4.g, y4.g / 4)); - afpvec4 v2 = image2d_ld4(bottom_blob_2d, ivec2(x4.b, y4.b / 4)); - afpvec4 v3 = image2d_ld4(bottom_blob_2d, ivec2(x4.a, y4.a / 4)); - afpvec4 v4 = image2d_ld4(bottom_blob_2d, ivec2(xx4.r, yy4.r / 4)); - afpvec4 v5 = image2d_ld4(bottom_blob_2d, ivec2(xx4.g, yy4.g / 4)); - afpvec4 v6 = image2d_ld4(bottom_blob_2d, ivec2(xx4.b, yy4.b / 4)); - afpvec4 v7 = image2d_ld4(bottom_blob_2d, ivec2(xx4.a, yy4.a / 4)); + afpvec4 v0 = image3d_ld4(bottom_blob_3d, ivec3(x4.r, y4.r / 4, 0)); + afpvec4 v1 = image3d_ld4(bottom_blob_3d, ivec3(x4.g, y4.g / 4, 0)); + afpvec4 v2 = image3d_ld4(bottom_blob_3d, ivec3(x4.b, y4.b / 4, 0)); + afpvec4 v3 = image3d_ld4(bottom_blob_3d, ivec3(x4.a, y4.a / 4, 0)); + afpvec4 v4 = image3d_ld4(bottom_blob_3d, ivec3(xx4.r, yy4.r / 4, 0)); + afpvec4 v5 = image3d_ld4(bottom_blob_3d, ivec3(xx4.g, yy4.g / 4, 0)); + afpvec4 v6 = image3d_ld4(bottom_blob_3d, ivec3(xx4.b, yy4.b / 4, 0)); + afpvec4 v7 = image3d_ld4(bottom_blob_3d, ivec3(xx4.a, yy4.a / 4, 0)); v[0].r = v0[y4.r % 4]; v[0].g = v1[y4.g % 4]; @@ -179,11 +175,11 @@ void main() if (ndim == 1) { - image1d_st8(top_blob_1d, gx, v); + image3d_st8(top_blob_3d, ivec3(gx, 0, 0), v); } if (ndim == 2) { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); + image3d_st8(top_blob_3d, ivec3(gx, gy, 0), v); } if (ndim == 3) { diff --git a/src/layer/vulkan/shader/reshape_pack8.comp b/src/layer/vulkan/shader/reshape_pack8.comp index 9aa1f473c..dbe564dd2 100644 --- a/src/layer/vulkan/shader/reshape_pack8.comp +++ b/src/layer/vulkan/shader/reshape_pack8.comp @@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else #if NCNN_fp16_packed @@ -104,14 +100,14 @@ void main() ivec4 x4 = i4; ivec4 xx4 = ii4; - afpvec8 v0 = image1d_ld8(bottom_blob_1d, x4.r / 8); - afpvec8 v1 = image1d_ld8(bottom_blob_1d, x4.g / 8); - afpvec8 v2 = image1d_ld8(bottom_blob_1d, x4.b / 8); - afpvec8 v3 = image1d_ld8(bottom_blob_1d, x4.a / 8); - afpvec8 v4 = image1d_ld8(bottom_blob_1d, xx4.r / 8); - afpvec8 v5 = image1d_ld8(bottom_blob_1d, xx4.g / 8); - afpvec8 v6 = image1d_ld8(bottom_blob_1d, xx4.b / 8); - afpvec8 v7 = image1d_ld8(bottom_blob_1d, xx4.a / 8); + afpvec8 v0 = image3d_ld8(bottom_blob_3d, ivec3(x4.r / 8, 0, 0)); + afpvec8 v1 = image3d_ld8(bottom_blob_3d, ivec3(x4.g / 8, 0, 0)); + afpvec8 v2 = image3d_ld8(bottom_blob_3d, ivec3(x4.b / 8, 0, 0)); + afpvec8 v3 = image3d_ld8(bottom_blob_3d, ivec3(x4.a / 8, 0, 0)); + afpvec8 v4 = image3d_ld8(bottom_blob_3d, ivec3(xx4.r / 8, 0, 0)); + afpvec8 v5 = image3d_ld8(bottom_blob_3d, ivec3(xx4.g / 8, 0, 0)); + afpvec8 v6 = image3d_ld8(bottom_blob_3d, ivec3(xx4.b / 8, 0, 0)); + afpvec8 v7 = image3d_ld8(bottom_blob_3d, ivec3(xx4.a / 8, 0, 0)); v[0].r = v0[(x4.r % 8) / 4][x4.r % 4]; v[0].g = v1[(x4.g % 8) / 4][x4.g % 4]; @@ -129,14 +125,14 @@ void main() ivec4 yy4 = ii4 / psc(w); ivec4 xx4 = ii4 % psc(w); - afpvec8 v0 = image2d_ld8(bottom_blob_2d, ivec2(x4.r, y4.r / 8)); - afpvec8 v1 = image2d_ld8(bottom_blob_2d, ivec2(x4.g, y4.g / 8)); - afpvec8 v2 = image2d_ld8(bottom_blob_2d, ivec2(x4.b, y4.b / 8)); - afpvec8 v3 = image2d_ld8(bottom_blob_2d, ivec2(x4.a, y4.a / 8)); - afpvec8 v4 = image2d_ld8(bottom_blob_2d, ivec2(xx4.r, yy4.r / 8)); - afpvec8 v5 = image2d_ld8(bottom_blob_2d, ivec2(xx4.g, yy4.g / 8)); - afpvec8 v6 = image2d_ld8(bottom_blob_2d, ivec2(xx4.b, yy4.b / 8)); - afpvec8 v7 = image2d_ld8(bottom_blob_2d, ivec2(xx4.a, yy4.a / 8)); + afpvec8 v0 = image3d_ld8(bottom_blob_3d, ivec3(x4.r, y4.r / 8, 0)); + afpvec8 v1 = image3d_ld8(bottom_blob_3d, ivec3(x4.g, y4.g / 8, 0)); + afpvec8 v2 = image3d_ld8(bottom_blob_3d, ivec3(x4.b, y4.b / 8, 0)); + afpvec8 v3 = image3d_ld8(bottom_blob_3d, ivec3(x4.a, y4.a / 8, 0)); + afpvec8 v4 = image3d_ld8(bottom_blob_3d, ivec3(xx4.r, yy4.r / 8, 0)); + afpvec8 v5 = image3d_ld8(bottom_blob_3d, ivec3(xx4.g, yy4.g / 8, 0)); + afpvec8 v6 = image3d_ld8(bottom_blob_3d, ivec3(xx4.b, yy4.b / 8, 0)); + afpvec8 v7 = image3d_ld8(bottom_blob_3d, ivec3(xx4.a, yy4.a / 8, 0)); v[0].r = v0[(y4.r % 8) / 4][y4.r % 4]; v[0].g = v1[(y4.g % 8) / 4][y4.g % 4]; @@ -179,11 +175,11 @@ void main() if (ndim == 1) { - image1d_st8(top_blob_1d, gx, v); + image3d_st8(top_blob_3d, ivec3(gx, 0, 0), v); } if (ndim == 2) { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); + image3d_st8(top_blob_3d, ivec3(gx, gy, 0), v); } if (ndim == 3) { diff --git a/src/layer/vulkan/shader/reshape_pack8to1.comp b/src/layer/vulkan/shader/reshape_pack8to1.comp index 0c2a46b82..4e2c913a6 100644 --- a/src/layer/vulkan/shader/reshape_pack8to1.comp +++ b/src/layer/vulkan/shader/reshape_pack8to1.comp @@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; @@ -89,11 +85,11 @@ void main() if (psc(dims) == 1) { - v = image1d_ld8(bottom_blob_1d, gx); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, 0, 0)); } else if (psc(dims) == 2) { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, 0)); } else // if (psc(dims) == 3) { @@ -105,14 +101,14 @@ void main() ivec4 x4 = i4; ivec4 xx4 = ii4; - image1d_st1(top_blob_1d, x4.r, v[0].r); - image1d_st1(top_blob_1d, x4.g, v[0].g); - image1d_st1(top_blob_1d, x4.b, v[0].b); - image1d_st1(top_blob_1d, x4.a, v[0].a); - image1d_st1(top_blob_1d, xx4.r, v[1].r); - image1d_st1(top_blob_1d, xx4.g, v[1].g); - image1d_st1(top_blob_1d, xx4.b, v[1].b); - image1d_st1(top_blob_1d, xx4.a, v[1].a); + image3d_st1(top_blob_3d, ivec3(x4.r, 0, 0), v[0].r); + image3d_st1(top_blob_3d, ivec3(x4.g, 0, 0), v[0].g); + image3d_st1(top_blob_3d, ivec3(x4.b, 0, 0), v[0].b); + image3d_st1(top_blob_3d, ivec3(x4.a, 0, 0), v[0].a); + image3d_st1(top_blob_3d, ivec3(xx4.r, 0, 0), v[1].r); + image3d_st1(top_blob_3d, ivec3(xx4.g, 0, 0), v[1].g); + image3d_st1(top_blob_3d, ivec3(xx4.b, 0, 0), v[1].b); + image3d_st1(top_blob_3d, ivec3(xx4.a, 0, 0), v[1].a); } if (ndim == 2) { @@ -121,14 +117,14 @@ void main() ivec4 yy4 = ii4 / psc(outw); ivec4 xx4 = ii4 % psc(outw); - image2d_st1(top_blob_2d, ivec2(x4.r, y4.r), v[0].r); - image2d_st1(top_blob_2d, ivec2(x4.g, y4.g), v[0].g); - image2d_st1(top_blob_2d, ivec2(x4.b, y4.b), v[0].b); - image2d_st1(top_blob_2d, ivec2(x4.a, y4.a), v[0].a); - image2d_st1(top_blob_2d, ivec2(xx4.r, yy4.r), v[1].r); - image2d_st1(top_blob_2d, ivec2(xx4.g, yy4.g), v[1].g); - image2d_st1(top_blob_2d, ivec2(xx4.b, yy4.b), v[1].b); - image2d_st1(top_blob_2d, ivec2(xx4.a, yy4.a), v[1].a); + image3d_st1(top_blob_3d, ivec3(x4.r, y4.r, 0), v[0].r); + image3d_st1(top_blob_3d, ivec3(x4.g, y4.g, 0), v[0].g); + image3d_st1(top_blob_3d, ivec3(x4.b, y4.b, 0), v[0].b); + image3d_st1(top_blob_3d, ivec3(x4.a, y4.a, 0), v[0].a); + image3d_st1(top_blob_3d, ivec3(xx4.r, yy4.r, 0), v[1].r); + image3d_st1(top_blob_3d, ivec3(xx4.g, yy4.g, 0), v[1].g); + image3d_st1(top_blob_3d, ivec3(xx4.b, yy4.b, 0), v[1].b); + image3d_st1(top_blob_3d, ivec3(xx4.a, yy4.a, 0), v[1].a); } if (ndim == 3) { diff --git a/src/layer/vulkan/shader/reshape_pack8to4.comp b/src/layer/vulkan/shader/reshape_pack8to4.comp index 6460eaef5..c9d5eecae 100644 --- a/src/layer/vulkan/shader/reshape_pack8to4.comp +++ b/src/layer/vulkan/shader/reshape_pack8to4.comp @@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else #if NCNN_fp16_packed @@ -90,10 +86,10 @@ void main() { ivec4 x4 = i4; - afpvec8 v0 = image1d_ld8(bottom_blob_1d, x4.r / 8); - afpvec8 v1 = image1d_ld8(bottom_blob_1d, x4.g / 8); - afpvec8 v2 = image1d_ld8(bottom_blob_1d, x4.b / 8); - afpvec8 v3 = image1d_ld8(bottom_blob_1d, x4.a / 8); + afpvec8 v0 = image3d_ld8(bottom_blob_3d, ivec3(x4.r / 8, 0, 0)); + afpvec8 v1 = image3d_ld8(bottom_blob_3d, ivec3(x4.g / 8, 0, 0)); + afpvec8 v2 = image3d_ld8(bottom_blob_3d, ivec3(x4.b / 8, 0, 0)); + afpvec8 v3 = image3d_ld8(bottom_blob_3d, ivec3(x4.a / 8, 0, 0)); v.r = v0[(x4.r % 8) / 4][x4.r % 4]; v.g = v1[(x4.g % 8) / 4][x4.g % 4]; @@ -105,10 +101,10 @@ void main() ivec4 y4 = i4 / psc(w); ivec4 x4 = i4 % psc(w); - afpvec8 v0 = image2d_ld8(bottom_blob_2d, ivec2(x4.r, y4.r / 8)); - afpvec8 v1 = image2d_ld8(bottom_blob_2d, ivec2(x4.g, y4.g / 8)); - afpvec8 v2 = image2d_ld8(bottom_blob_2d, ivec2(x4.b, y4.b / 8)); - afpvec8 v3 = image2d_ld8(bottom_blob_2d, ivec2(x4.a, y4.a / 8)); + afpvec8 v0 = image3d_ld8(bottom_blob_3d, ivec3(x4.r, y4.r / 8, 0)); + afpvec8 v1 = image3d_ld8(bottom_blob_3d, ivec3(x4.g, y4.g / 8, 0)); + afpvec8 v2 = image3d_ld8(bottom_blob_3d, ivec3(x4.b, y4.b / 8, 0)); + afpvec8 v3 = image3d_ld8(bottom_blob_3d, ivec3(x4.a, y4.a / 8, 0)); v.r = v0[(y4.r % 8) / 4][y4.r % 4]; v.g = v1[(y4.g % 8) / 4][y4.g % 4]; @@ -136,11 +132,11 @@ void main() if (ndim == 1) { - image1d_st4(top_blob_1d, gx, v); + image3d_st4(top_blob_3d, ivec3(gx, 0, 0), v); } if (ndim == 2) { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); + image3d_st4(top_blob_3d, ivec3(gx, gy, 0), v); } if (ndim == 3) { diff --git a/src/layer/vulkan/shader/scale.comp b/src/layer/vulkan/shader/scale.comp index 0b467533d..535224112 100644 --- a/src/layer/vulkan/shader/scale.comp +++ b/src/layer/vulkan/shader/scale.comp @@ -31,14 +31,10 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; -layout (binding = 2) uniform unfp sampler1D scale_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 2) uniform unfp sampler3D scale_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer scale_blob { sfp scale_blob_data[]; }; @@ -64,19 +60,7 @@ void main() return; #if NCNN_image_shader - afp v; - if (psc(dims) == 1) - { - v = image1d_ld1(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -88,9 +72,9 @@ void main() #if NCNN_image_shader if (bias_term == 1) - v = image1d_ld1(scale_blob, abi) * v + image1d_ld1(bias_blob, abi); + v = image3d_ld1(scale_blob, ivec3(abi, 0, 0)) * v + image3d_ld1(bias_blob, ivec3(abi, 0, 0)); else - v = image1d_ld1(scale_blob, abi) * v; + v = image3d_ld1(scale_blob, ivec3(abi, 0, 0)) * v; #else if (bias_term == 1) v = buffer_ld1(scale_blob_data, abi) * v + buffer_ld1(bias_blob_data, abi); @@ -99,18 +83,7 @@ void main() #endif #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st1(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st1(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/scale_pack4.comp b/src/layer/vulkan/shader/scale_pack4.comp index df07c377d..a7447c923 100644 --- a/src/layer/vulkan/shader/scale_pack4.comp +++ b/src/layer/vulkan/shader/scale_pack4.comp @@ -31,14 +31,10 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; -layout (binding = 2) uniform unfp sampler1D scale_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 2) uniform unfp sampler3D scale_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer scale_blob { sfpvec4 scale_blob_data[]; }; @@ -64,19 +60,7 @@ void main() return; #if NCNN_image_shader - afpvec4 v; - if (psc(dims) == 1) - { - v = image1d_ld4(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -88,9 +72,9 @@ void main() #if NCNN_image_shader if (bias_term == 1) - v = image1d_ld4(scale_blob, abi) * v + image1d_ld4(bias_blob, abi); + v = image3d_ld4(scale_blob, ivec3(abi, 0, 0)) * v + image3d_ld4(bias_blob, ivec3(abi, 0, 0)); else - v = image1d_ld4(scale_blob, abi) * v; + v = image3d_ld4(scale_blob, ivec3(abi, 0, 0)) * v; #else if (bias_term == 1) v = buffer_ld4(scale_blob_data, abi) * v + buffer_ld4(bias_blob_data, abi); @@ -99,18 +83,7 @@ void main() #endif #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st4(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st4(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/scale_pack8.comp b/src/layer/vulkan/shader/scale_pack8.comp index cf2093cbf..43232f620 100644 --- a/src/layer/vulkan/shader/scale_pack8.comp +++ b/src/layer/vulkan/shader/scale_pack8.comp @@ -32,14 +32,10 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; -layout (binding = 2) uniform unfp sampler1D scale_blob; -layout (binding = 3) uniform unfp sampler1D bias_blob; +layout (binding = 2) uniform unfp sampler3D scale_blob; +layout (binding = 3) uniform unfp sampler3D bias_blob; #else layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer scale_blob { sfpvec8 scale_blob_data[]; }; @@ -65,19 +61,7 @@ void main() return; #if NCNN_image_shader - afpvec8 v; - if (psc(dims) == 1) - { - v = image1d_ld8(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -88,7 +72,7 @@ void main() const int abi = gxyz[psc(dims) - 1]; #if NCNN_image_shader - afpvec8 scale = image1d_ld8(scale_blob, abi); + afpvec8 scale = image3d_ld8(scale_blob, ivec3(abi, 0, 0)); #else afpvec8 scale = buffer_ld8(scale_blob_data, abi); #endif @@ -96,7 +80,7 @@ void main() if (bias_term == 1) { #if NCNN_image_shader - afpvec8 bias = image1d_ld8(bias_blob, abi); + afpvec8 bias = image3d_ld8(bias_blob, ivec3(abi, 0, 0)); #else afpvec8 bias = buffer_ld8(bias_blob_data, abi); #endif @@ -110,18 +94,7 @@ void main() } #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st8(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st8(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/sigmoid.comp b/src/layer/vulkan/shader/sigmoid.comp index 8f9ce761f..bda303fd6 100644 --- a/src/layer/vulkan/shader/sigmoid.comp +++ b/src/layer/vulkan/shader/sigmoid.comp @@ -29,11 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; @@ -58,19 +54,7 @@ void main() return; #if NCNN_image_shader - afp v; - if (psc(dims) == 1) - { - v = image1d_ld1(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -80,18 +64,7 @@ void main() v = afp(1.f) / (afp(1.f) + exp(-v)); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st1(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st1(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/sigmoid_pack4.comp b/src/layer/vulkan/shader/sigmoid_pack4.comp index ff4e1c745..129c18e54 100644 --- a/src/layer/vulkan/shader/sigmoid_pack4.comp +++ b/src/layer/vulkan/shader/sigmoid_pack4.comp @@ -29,11 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; @@ -58,19 +54,7 @@ void main() return; #if NCNN_image_shader - afpvec4 v; - if (psc(dims) == 1) - { - v = image1d_ld4(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -80,18 +64,7 @@ void main() v = afp(1.f) / (afp(1.f) + exp(-v)); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st4(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st4(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/sigmoid_pack8.comp b/src/layer/vulkan/shader/sigmoid_pack8.comp index 5a0d7968f..828206c5d 100644 --- a/src/layer/vulkan/shader/sigmoid_pack8.comp +++ b/src/layer/vulkan/shader/sigmoid_pack8.comp @@ -30,11 +30,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; @@ -59,19 +55,7 @@ void main() return; #if NCNN_image_shader - afpvec8 v; - if (psc(dims) == 1) - { - v = image1d_ld8(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -82,18 +66,7 @@ void main() v[1] = afp(1.f) / (afp(1.f) + exp(-v[1])); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st8(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st8(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/slice.comp b/src/layer/vulkan/shader/slice.comp index b92902745..96668ba18 100644 --- a/src/layer/vulkan/shader/slice.comp +++ b/src/layer/vulkan/shader/slice.comp @@ -37,11 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; @@ -74,29 +70,15 @@ void main() if (gx >= psc(outw) || gy >= psc(outh) || gz >= psc(outc)) return; -#if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_cp1(top_blob_1d, gx, bottom_blob_1d, gx + p.offset); - } - else if (psc(dims) == 2) - { - if (axis == 0) image2d_cp1(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy + p.offset)); - if (axis == 1) image2d_cp1(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx + p.offset, gy)); - } - else // if (psc(dims) == 3) - { - if (axis == 0) image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz + p.offset)); - if (axis == 1) image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy + p.offset, gz)); - if (axis == 2) image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx + p.offset, gy, gz)); - } -#else - const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; - ivec3 gxyz = ivec3(gx, gy, gz); gxyz[psc(dims) - 1 - axis] += p.offset; +#if NCNN_image_shader + image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, gxyz); +#else + const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; + int v_offset = gxyz.z * psc(cstep) + gxyz.y * psc(w) + gxyz.x; buffer_cp1(top_blob_data, gi, bottom_blob_data, v_offset); diff --git a/src/layer/vulkan/shader/slice_pack1to4.comp b/src/layer/vulkan/shader/slice_pack1to4.comp index 55943e4d0..aac93abe1 100644 --- a/src/layer/vulkan/shader/slice_pack1to4.comp +++ b/src/layer/vulkan/shader/slice_pack1to4.comp @@ -37,11 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; @@ -74,87 +70,67 @@ void main() if (gx >= psc(outw) || gy >= psc(outh) || gz >= psc(outc)) return; -#if NCNN_image_shader - if (psc(dims) == 1) - { - afpvec4 v; + ivec3 gxyz = ivec3(gx, gy, gz); - int gx4 = gx * 4 + p.offset; + gxyz[psc(dims) - 1] *= 4; + gxyz[psc(dims) - 1 - axis] += p.offset; - v.r = image1d_ld1(bottom_blob_1d, gx4 + 0); - v.g = image1d_ld1(bottom_blob_1d, gx4 + 1); - v.b = image1d_ld1(bottom_blob_1d, gx4 + 2); - v.a = image1d_ld1(bottom_blob_1d, gx4 + 3); +#if NCNN_image_shader + afpvec4 v; - image1d_st4(top_blob_1d, gx, v); + if (psc(dims) == 1) + { + v.r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 0, 0, 0)); + v.g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 1, 0, 0)); + v.b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 2, 0, 0)); + v.a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 3, 0, 0)); } else if (psc(dims) == 2) { - afpvec4 v; - if (axis == 0) { - int gy4 = gy * 4 + p.offset; - - v.r = image2d_ld1(bottom_blob_2d, ivec2(gx, gy4 + 0)); - v.g = image2d_ld1(bottom_blob_2d, ivec2(gx, gy4 + 1)); - v.b = image2d_ld1(bottom_blob_2d, ivec2(gx, gy4 + 2)); - v.a = image2d_ld1(bottom_blob_2d, ivec2(gx, gy4 + 3)); + v.r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 0, 0)); + v.g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 1, 0)); + v.b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 2, 0)); + v.a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 3, 0)); } if (axis == 1) { - int gx4 = gx * 4 + p.offset; - - v.r = image2d_ld1(bottom_blob_2d, ivec2(gx4 + 0, gy)); - v.g = image2d_ld1(bottom_blob_2d, ivec2(gx4 + 1, gy)); - v.b = image2d_ld1(bottom_blob_2d, ivec2(gx4 + 2, gy)); - v.a = image2d_ld1(bottom_blob_2d, ivec2(gx4 + 3, gy)); + v.r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 0, gxyz.y, 0)); + v.g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 1, gxyz.y, 0)); + v.b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 2, gxyz.y, 0)); + v.a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 3, gxyz.y, 0)); } - - image2d_st4(top_blob_2d, ivec2(gx, gy), v); } else // if (psc(dims) == 3) { - afpvec4 v; - if (axis == 0) { - int gz4 = gz * 4 + p.offset; - - v.r = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz4 + 0)); - v.g = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz4 + 1)); - v.b = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz4 + 2)); - v.a = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz4 + 3)); + v.r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 0)); + v.g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 1)); + v.b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 2)); + v.a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 3)); } if (axis == 1) { - int gy4 = gy * 4 + p.offset; - - v.r = image3d_ld1(bottom_blob_3d, ivec3(gx, gy4 + 0, gz)); - v.g = image3d_ld1(bottom_blob_3d, ivec3(gx, gy4 + 1, gz)); - v.b = image3d_ld1(bottom_blob_3d, ivec3(gx, gy4 + 2, gz)); - v.a = image3d_ld1(bottom_blob_3d, ivec3(gx, gy4 + 3, gz)); + v.r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 0, gxyz.z)); + v.g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 1, gxyz.z)); + v.b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 2, gxyz.z)); + v.a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 3, gxyz.z)); } if (axis == 2) { - int gx4 = gx * 4 + p.offset; - - v.r = image3d_ld1(bottom_blob_3d, ivec3(gx4 + 0, gy, gz)); - v.g = image3d_ld1(bottom_blob_3d, ivec3(gx4 + 1, gy, gz)); - v.b = image3d_ld1(bottom_blob_3d, ivec3(gx4 + 2, gy, gz)); - v.a = image3d_ld1(bottom_blob_3d, ivec3(gx4 + 3, gy, gz)); + v.r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 0, gxyz.y, gxyz.z)); + v.g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 1, gxyz.y, gxyz.z)); + v.b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 2, gxyz.y, gxyz.z)); + v.a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 3, gxyz.y, gxyz.z)); } - - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); } + + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; - ivec3 gxyz = ivec3(gx, gy, gz); - - gxyz[psc(dims) - 1] *= 4; - gxyz[psc(dims) - 1 - axis] += p.offset; - int v_offset_0 = gxyz.z * psc(cstep) + gxyz.y * psc(w) + gxyz.x; ivec3 gxyz4 = ivec3(1, psc(w), psc(cstep)); diff --git a/src/layer/vulkan/shader/slice_pack1to8.comp b/src/layer/vulkan/shader/slice_pack1to8.comp index 5b6fe1093..39997827b 100644 --- a/src/layer/vulkan/shader/slice_pack1to8.comp +++ b/src/layer/vulkan/shader/slice_pack1to8.comp @@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; @@ -75,111 +71,91 @@ void main() if (gx >= psc(outw) || gy >= psc(outh) || gz >= psc(outc)) return; -#if NCNN_image_shader - if (psc(dims) == 1) - { - afpvec8 v; + ivec3 gxyz = ivec3(gx, gy, gz); - int gx8 = gx * 8 + p.offset; + gxyz[psc(dims) - 1] *= 8; + gxyz[psc(dims) - 1 - axis] += p.offset; - v[0].r = image1d_ld1(bottom_blob_1d, gx8 + 0); - v[0].g = image1d_ld1(bottom_blob_1d, gx8 + 1); - v[0].b = image1d_ld1(bottom_blob_1d, gx8 + 2); - v[0].a = image1d_ld1(bottom_blob_1d, gx8 + 3); - v[1].r = image1d_ld1(bottom_blob_1d, gx8 + 4); - v[1].g = image1d_ld1(bottom_blob_1d, gx8 + 5); - v[1].b = image1d_ld1(bottom_blob_1d, gx8 + 6); - v[1].a = image1d_ld1(bottom_blob_1d, gx8 + 7); +#if NCNN_image_shader + afpvec8 v; - image1d_st8(top_blob_1d, gx, v); + if (psc(dims) == 1) + { + v[0].r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 0, 0, 0)); + v[0].g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 1, 0, 0)); + v[0].b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 2, 0, 0)); + v[0].a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 3, 0, 0)); + v[1].r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 4, 0, 0)); + v[1].g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 5, 0, 0)); + v[1].b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 6, 0, 0)); + v[1].a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 7, 0, 0)); } else if (psc(dims) == 2) { - afpvec8 v; - if (axis == 0) { - int gy8 = gy * 8 + p.offset; - - v[0].r = image2d_ld1(bottom_blob_2d, ivec2(gx, gy8 + 0)); - v[0].g = image2d_ld1(bottom_blob_2d, ivec2(gx, gy8 + 1)); - v[0].b = image2d_ld1(bottom_blob_2d, ivec2(gx, gy8 + 2)); - v[0].a = image2d_ld1(bottom_blob_2d, ivec2(gx, gy8 + 3)); - v[1].r = image2d_ld1(bottom_blob_2d, ivec2(gx, gy8 + 4)); - v[1].g = image2d_ld1(bottom_blob_2d, ivec2(gx, gy8 + 5)); - v[1].b = image2d_ld1(bottom_blob_2d, ivec2(gx, gy8 + 6)); - v[1].a = image2d_ld1(bottom_blob_2d, ivec2(gx, gy8 + 7)); + v[0].r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 0, 0)); + v[0].g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 1, 0)); + v[0].b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 2, 0)); + v[0].a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 3, 0)); + v[1].r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 4, 0)); + v[1].g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 5, 0)); + v[1].b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 6, 0)); + v[1].a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 7, 0)); } if (axis == 1) { - int gx8 = gx * 8 + p.offset; - - v[0].r = image2d_ld1(bottom_blob_2d, ivec2(gx8 + 0, gy)); - v[0].g = image2d_ld1(bottom_blob_2d, ivec2(gx8 + 1, gy)); - v[0].b = image2d_ld1(bottom_blob_2d, ivec2(gx8 + 2, gy)); - v[0].a = image2d_ld1(bottom_blob_2d, ivec2(gx8 + 3, gy)); - v[1].r = image2d_ld1(bottom_blob_2d, ivec2(gx8 + 4, gy)); - v[1].g = image2d_ld1(bottom_blob_2d, ivec2(gx8 + 5, gy)); - v[1].b = image2d_ld1(bottom_blob_2d, ivec2(gx8 + 6, gy)); - v[1].a = image2d_ld1(bottom_blob_2d, ivec2(gx8 + 7, gy)); + v[0].r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 0, gxyz.y, 0)); + v[0].g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 1, gxyz.y, 0)); + v[0].b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 2, gxyz.y, 0)); + v[0].a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 3, gxyz.y, 0)); + v[1].r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 4, gxyz.y, 0)); + v[1].g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 5, gxyz.y, 0)); + v[1].b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 6, gxyz.y, 0)); + v[1].a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 7, gxyz.y, 0)); } - - image2d_st8(top_blob_2d, ivec2(gx, gy), v); } else // if (psc(dims) == 3) { - afpvec8 v; - if (axis == 0) { - int gz8 = gz * 8 + p.offset; - - v[0].r = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz8 + 0)); - v[0].g = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz8 + 1)); - v[0].b = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz8 + 2)); - v[0].a = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz8 + 3)); - v[1].r = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz8 + 4)); - v[1].g = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz8 + 5)); - v[1].b = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz8 + 6)); - v[1].a = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz8 + 7)); + v[0].r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 0)); + v[0].g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 1)); + v[0].b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 2)); + v[0].a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 3)); + v[1].r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 4)); + v[1].g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 5)); + v[1].b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 6)); + v[1].a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 7)); } if (axis == 1) { - int gy8 = gy * 8 + p.offset; - - v[0].r = image3d_ld1(bottom_blob_3d, ivec3(gx, gy8 + 0, gz)); - v[0].g = image3d_ld1(bottom_blob_3d, ivec3(gx, gy8 + 1, gz)); - v[0].b = image3d_ld1(bottom_blob_3d, ivec3(gx, gy8 + 2, gz)); - v[0].a = image3d_ld1(bottom_blob_3d, ivec3(gx, gy8 + 3, gz)); - v[1].r = image3d_ld1(bottom_blob_3d, ivec3(gx, gy8 + 4, gz)); - v[1].g = image3d_ld1(bottom_blob_3d, ivec3(gx, gy8 + 5, gz)); - v[1].b = image3d_ld1(bottom_blob_3d, ivec3(gx, gy8 + 6, gz)); - v[1].a = image3d_ld1(bottom_blob_3d, ivec3(gx, gy8 + 7, gz)); + v[0].r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 0, gxyz.z)); + v[0].g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 1, gxyz.z)); + v[0].b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 2, gxyz.z)); + v[0].a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 3, gxyz.z)); + v[1].r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 4, gxyz.z)); + v[1].g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 5, gxyz.z)); + v[1].b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 6, gxyz.z)); + v[1].a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 7, gxyz.z)); } if (axis == 2) { - int gx8 = gx * 8 + p.offset; - - v[0].r = image3d_ld1(bottom_blob_3d, ivec3(gx8 + 0, gy, gz)); - v[0].g = image3d_ld1(bottom_blob_3d, ivec3(gx8 + 1, gy, gz)); - v[0].b = image3d_ld1(bottom_blob_3d, ivec3(gx8 + 2, gy, gz)); - v[0].a = image3d_ld1(bottom_blob_3d, ivec3(gx8 + 3, gy, gz)); - v[1].r = image3d_ld1(bottom_blob_3d, ivec3(gx8 + 4, gy, gz)); - v[1].g = image3d_ld1(bottom_blob_3d, ivec3(gx8 + 5, gy, gz)); - v[1].b = image3d_ld1(bottom_blob_3d, ivec3(gx8 + 6, gy, gz)); - v[1].a = image3d_ld1(bottom_blob_3d, ivec3(gx8 + 7, gy, gz)); + v[0].r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 0, gxyz.y, gxyz.z)); + v[0].g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 1, gxyz.y, gxyz.z)); + v[0].b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 2, gxyz.y, gxyz.z)); + v[0].a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 3, gxyz.y, gxyz.z)); + v[1].r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 4, gxyz.y, gxyz.z)); + v[1].g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 5, gxyz.y, gxyz.z)); + v[1].b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 6, gxyz.y, gxyz.z)); + v[1].a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 7, gxyz.y, gxyz.z)); } - - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); } + + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; - ivec3 gxyz = ivec3(gx, gy, gz); - - gxyz[psc(dims) - 1] *= 8; - gxyz[psc(dims) - 1 - axis] += p.offset; - int v_offset_0 = gxyz.z * psc(cstep) + gxyz.y * psc(w) + gxyz.x; ivec3 gxyz4 = ivec3(1, psc(w), psc(cstep)); diff --git a/src/layer/vulkan/shader/slice_pack4.comp b/src/layer/vulkan/shader/slice_pack4.comp index 71bbb078f..3d42adf61 100644 --- a/src/layer/vulkan/shader/slice_pack4.comp +++ b/src/layer/vulkan/shader/slice_pack4.comp @@ -37,11 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; @@ -74,29 +70,15 @@ void main() if (gx >= psc(outw) || gy >= psc(outh) || gz >= psc(outc)) return; -#if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_cp4(top_blob_1d, gx, bottom_blob_1d, gx + p.offset); - } - else if (psc(dims) == 2) - { - if (axis == 0) image2d_cp4(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy + p.offset)); - if (axis == 1) image2d_cp4(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx + p.offset, gy)); - } - else // if (psc(dims) == 3) - { - if (axis == 0) image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz + p.offset)); - if (axis == 1) image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy + p.offset, gz)); - if (axis == 2) image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx + p.offset, gy, gz)); - } -#else - const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; - ivec3 gxyz = ivec3(gx, gy, gz); gxyz[psc(dims) - 1 - axis] += p.offset; +#if NCNN_image_shader + image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, gxyz); +#else + const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; + int v_offset = gxyz.z * psc(cstep) + gxyz.y * psc(w) + gxyz.x; buffer_cp4(top_blob_data, gi, bottom_blob_data, v_offset); diff --git a/src/layer/vulkan/shader/slice_pack4to8.comp b/src/layer/vulkan/shader/slice_pack4to8.comp index ca6bf2513..9143fa609 100644 --- a/src/layer/vulkan/shader/slice_pack4to8.comp +++ b/src/layer/vulkan/shader/slice_pack4to8.comp @@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; @@ -75,75 +71,55 @@ void main() if (gx >= psc(outw) || gy >= psc(outh) || gz >= psc(outc)) return; -#if NCNN_image_shader - if (psc(dims) == 1) - { - afpvec8 v; + ivec3 gxyz = ivec3(gx, gy, gz); - int gx2 = gx * 2 + p.offset; + gxyz[psc(dims) - 1] *= 2; + gxyz[psc(dims) - 1 - axis] += p.offset; - v[0] = image1d_ld4(bottom_blob_1d, gx2 + 0); - v[1] = image1d_ld4(bottom_blob_1d, gx2 + 1); +#if NCNN_image_shader + afpvec8 v; - image1d_st8(top_blob_1d, gx, v); + if (psc(dims) == 1) + { + v[0] = image3d_ld4(bottom_blob_3d, ivec3(gxyz.x + 0, 0, 0)); + v[1] = image3d_ld4(bottom_blob_3d, ivec3(gxyz.x + 1, 0, 0)); } else if (psc(dims) == 2) { - afpvec8 v; - if (axis == 0) { - int gy2 = gy * 2 + p.offset; - - v[0] = image2d_ld4(bottom_blob_2d, ivec2(gx, gy2 + 0)); - v[1] = image2d_ld4(bottom_blob_2d, ivec2(gx, gy2 + 1)); + v[0] = image3d_ld4(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 0, 0)); + v[1] = image3d_ld4(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 1, 0)); } if (axis == 1) { - int gx2 = gx * 2 + p.offset; - - v[0] = image2d_ld4(bottom_blob_2d, ivec2(gx2 + 0, gy)); - v[1] = image2d_ld4(bottom_blob_2d, ivec2(gx2 + 1, gy)); + v[0] = image3d_ld4(bottom_blob_3d, ivec3(gxyz.x + 0, gxyz.y, 0)); + v[1] = image3d_ld4(bottom_blob_3d, ivec3(gxyz.x + 1, gxyz.y, 0)); } - - image2d_st8(top_blob_2d, ivec2(gx, gy), v); } else // if (psc(dims) == 3) { - afpvec8 v; - if (axis == 0) { - int gz2 = gz * 2 + p.offset; - - v[0] = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz2 + 0)); - v[1] = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz2 + 1)); + v[0] = image3d_ld4(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 0)); + v[1] = image3d_ld4(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 1)); } if (axis == 1) { - int gy2 = gy * 2 + p.offset; - - v[0] = image3d_ld4(bottom_blob_3d, ivec3(gx, gy2 + 0, gz)); - v[1] = image3d_ld4(bottom_blob_3d, ivec3(gx, gy2 + 1, gz)); + v[0] = image3d_ld4(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 0, gxyz.z)); + v[1] = image3d_ld4(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 1, gxyz.z)); } if (axis == 2) { - int gx2 = gx * 2 + p.offset; - - v[0] = image3d_ld4(bottom_blob_3d, ivec3(gx2 + 0, gy, gz)); - v[1] = image3d_ld4(bottom_blob_3d, ivec3(gx2 + 1, gy, gz)); + v[0] = image3d_ld4(bottom_blob_3d, ivec3(gxyz.x + 0, gxyz.y, gxyz.z)); + v[1] = image3d_ld4(bottom_blob_3d, ivec3(gxyz.x + 1, gxyz.y, gxyz.z)); } - - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); } + + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); #else const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; - ivec3 gxyz = ivec3(gx, gy, gz); - - gxyz[psc(dims) - 1] *= 2; - gxyz[psc(dims) - 1 - axis] += p.offset; - int v_offset_0 = gxyz.z * psc(cstep) + gxyz.y * psc(w) + gxyz.x; ivec3 gxyz4 = ivec3(1, psc(w), psc(cstep)); diff --git a/src/layer/vulkan/shader/slice_pack8.comp b/src/layer/vulkan/shader/slice_pack8.comp index 7b796617a..2d09f5c5e 100644 --- a/src/layer/vulkan/shader/slice_pack8.comp +++ b/src/layer/vulkan/shader/slice_pack8.comp @@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; @@ -75,29 +71,15 @@ void main() if (gx >= psc(outw) || gy >= psc(outh) || gz >= psc(outc)) return; -#if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_cp8(top_blob_1d, gx, bottom_blob_1d, gx + p.offset); - } - else if (psc(dims) == 2) - { - if (axis == 0) image2d_cp8(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy + p.offset)); - if (axis == 1) image2d_cp8(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx + p.offset, gy)); - } - else // if (psc(dims) == 3) - { - if (axis == 0) image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz + p.offset)); - if (axis == 1) image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy + p.offset, gz)); - if (axis == 2) image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx + p.offset, gy, gz)); - } -#else - const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; - ivec3 gxyz = ivec3(gx, gy, gz); gxyz[psc(dims) - 1 - axis] += p.offset; +#if NCNN_image_shader + image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, gxyz); +#else + const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; + int v_offset = gxyz.z * psc(cstep) + gxyz.y * psc(w) + gxyz.x; buffer_cp8(top_blob_data, gi, bottom_blob_data, v_offset); diff --git a/src/layer/vulkan/shader/softmax_div_sum.comp b/src/layer/vulkan/shader/softmax_div_sum.comp index f2e300cb3..f48fe8bf7 100644 --- a/src/layer/vulkan/shader/softmax_div_sum.comp +++ b/src/layer/vulkan/shader/softmax_div_sum.comp @@ -37,14 +37,9 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; -layout (binding = 2) uniform unfp sampler1D sum_workspace_1d; -layout (binding = 2) uniform unfp sampler2D sum_workspace_2d; +layout (binding = 2) uniform unfp sampler3D sum_workspace_3d; #else layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer sum_workspace { sfp sum_workspace_data[]; }; @@ -80,33 +75,33 @@ void main() if (psc(dims) == 1) // axis == 0 { - v = image1d_ld1(bottom_blob_1d, gx); - sum = image1d_ld1(sum_workspace_1d, 0); + v = image3d_ld1(bottom_blob_3d, ivec3(gx, 0, 0)); + sum = image3d_ld1(sum_workspace_3d, ivec3(0, 0, 0)); } else if (psc(dims) == 2 && axis == 0) { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); - sum = image1d_ld1(sum_workspace_1d, gx); + v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, 0)); + sum = image3d_ld1(sum_workspace_3d, ivec3(gx, 0, 0)); } else if (psc(dims) == 2 && axis == 1) { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); - sum = image1d_ld1(sum_workspace_1d, gy); + v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, 0)); + sum = image3d_ld1(sum_workspace_3d, ivec3(gy, 0, 0)); } else if (psc(dims) == 3 && axis == 0) { v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - sum = image2d_ld1(sum_workspace_2d, ivec2(gx, gy)); + sum = image3d_ld1(sum_workspace_3d, ivec3(gx, gy, 0)); } else if (psc(dims) == 3 && axis == 1) { v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - sum = image2d_ld1(sum_workspace_2d, ivec2(gx, gz)); + sum = image3d_ld1(sum_workspace_3d, ivec3(gx, gz, 0)); } else if (psc(dims) == 3 && axis == 2) { v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - sum = image2d_ld1(sum_workspace_2d, ivec2(gy, gz)); + sum = image3d_ld1(sum_workspace_3d, ivec3(gy, gz, 0)); } #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -144,18 +139,7 @@ void main() v /= sum; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st1(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st1(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/softmax_div_sum_pack4.comp b/src/layer/vulkan/shader/softmax_div_sum_pack4.comp index 71ae74389..58161014d 100644 --- a/src/layer/vulkan/shader/softmax_div_sum_pack4.comp +++ b/src/layer/vulkan/shader/softmax_div_sum_pack4.comp @@ -37,14 +37,9 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; -layout (binding = 2) uniform unfp sampler1D sum_workspace_1d; -layout (binding = 2) uniform unfp sampler2D sum_workspace_2d; +layout (binding = 2) uniform unfp sampler3D sum_workspace_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer sum_workspace { sfpvec4 sum_workspace_data[]; }; @@ -80,33 +75,33 @@ void main() if (psc(dims) == 1) // axis == 0 { - v = image1d_ld4(bottom_blob_1d, gx); - sum = image1d_ld4(sum_workspace_1d, 0); + v = image3d_ld4(bottom_blob_3d, ivec3(gx, 0, 0)); + sum = image3d_ld4(sum_workspace_3d, ivec3(0, 0, 0)); } else if (psc(dims) == 2 && axis == 0) { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - sum = image1d_ld4(sum_workspace_1d, gx); + v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, 0)); + sum = image3d_ld4(sum_workspace_3d, ivec3(gx, 0, 0)); } else if (psc(dims) == 2 && axis == 1) { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - sum = image1d_ld4(sum_workspace_1d, gy); + v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, 0)); + sum = image3d_ld4(sum_workspace_3d, ivec3(gy, 0, 0)); } else if (psc(dims) == 3 && axis == 0) { v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - sum = image2d_ld4(sum_workspace_2d, ivec2(gx, gy)); + sum = image3d_ld4(sum_workspace_3d, ivec3(gx, gy, 0)); } else if (psc(dims) == 3 && axis == 1) { v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - sum = image2d_ld4(sum_workspace_2d, ivec2(gx, gz)); + sum = image3d_ld4(sum_workspace_3d, ivec3(gx, gz, 0)); } else if (psc(dims) == 3 && axis == 2) { v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - sum = image2d_ld4(sum_workspace_2d, ivec2(gy, gz)); + sum = image3d_ld4(sum_workspace_3d, ivec3(gy, gz, 0)); } #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -153,18 +148,7 @@ void main() v /= sum; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st4(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st4(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/softmax_div_sum_pack8.comp b/src/layer/vulkan/shader/softmax_div_sum_pack8.comp index f120c0898..b5c6f063b 100644 --- a/src/layer/vulkan/shader/softmax_div_sum_pack8.comp +++ b/src/layer/vulkan/shader/softmax_div_sum_pack8.comp @@ -38,14 +38,9 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; -layout (binding = 2) uniform unfp sampler1D sum_workspace_1d; -layout (binding = 2) uniform unfp sampler2D sum_workspace_2d; +layout (binding = 2) uniform unfp sampler3D sum_workspace_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer sum_workspace { sfpvec8 sum_workspace_data[]; }; @@ -81,33 +76,33 @@ void main() if (psc(dims) == 1) // axis == 0 { - v = image1d_ld8(bottom_blob_1d, gx); - sum = image1d_ld8(sum_workspace_1d, 0); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, 0, 0)); + sum = image3d_ld8(sum_workspace_3d, ivec3(0, 0, 0)); } else if (psc(dims) == 2 && axis == 0) { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - sum = image1d_ld8(sum_workspace_1d, gx); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, 0)); + sum = image3d_ld8(sum_workspace_3d, ivec3(gx, 0, 0)); } else if (psc(dims) == 2 && axis == 1) { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - sum = image1d_ld8(sum_workspace_1d, gy); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, 0)); + sum = image3d_ld8(sum_workspace_3d, ivec3(gy, 0, 0)); } else if (psc(dims) == 3 && axis == 0) { v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - sum = image2d_ld8(sum_workspace_2d, ivec2(gx, gy)); + sum = image3d_ld8(sum_workspace_3d, ivec3(gx, gy, 0)); } else if (psc(dims) == 3 && axis == 1) { v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - sum = image2d_ld8(sum_workspace_2d, ivec2(gx, gz)); + sum = image3d_ld8(sum_workspace_3d, ivec3(gx, gz, 0)); } else if (psc(dims) == 3 && axis == 2) { v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - sum = image2d_ld8(sum_workspace_2d, ivec2(gy, gz)); + sum = image3d_ld8(sum_workspace_3d, ivec3(gy, gz, 0)); } #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -155,18 +150,7 @@ void main() v[1] /= sum[1]; #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st8(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st8(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/softmax_exp_sub_max.comp b/src/layer/vulkan/shader/softmax_exp_sub_max.comp index 04c5115a7..e3b59951f 100644 --- a/src/layer/vulkan/shader/softmax_exp_sub_max.comp +++ b/src/layer/vulkan/shader/softmax_exp_sub_max.comp @@ -37,14 +37,9 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; -layout (binding = 2) uniform unfp sampler1D max_workspace_1d; -layout (binding = 2) uniform unfp sampler2D max_workspace_2d; +layout (binding = 2) uniform unfp sampler3D max_workspace_3d; #else layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer max_workspace { sfp max_workspace_data[]; }; @@ -80,33 +75,33 @@ void main() if (psc(dims) == 1) // axis == 0 { - v = image1d_ld1(bottom_blob_1d, gx); - max_value = image1d_ld1(max_workspace_1d, 0); + v = image3d_ld1(bottom_blob_3d, ivec3(gx, 0, 0)); + max_value = image3d_ld1(max_workspace_3d, ivec3(0, 0, 0)); } else if (psc(dims) == 2 && axis == 0) { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); - max_value = image1d_ld1(max_workspace_1d, gx); + v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, 0)); + max_value = image3d_ld1(max_workspace_3d, ivec3(gx, 0, 0)); } else if (psc(dims) == 2 && axis == 1) { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); - max_value = image1d_ld1(max_workspace_1d, gy); + v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, 0)); + max_value = image3d_ld1(max_workspace_3d, ivec3(gy, 0, 0)); } else if (psc(dims) == 3 && axis == 0) { v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - max_value = image2d_ld1(max_workspace_2d, ivec2(gx, gy)); + max_value = image3d_ld1(max_workspace_3d, ivec3(gx, gy, 0)); } else if (psc(dims) == 3 && axis == 1) { v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - max_value = image2d_ld1(max_workspace_2d, ivec2(gx, gz)); + max_value = image3d_ld1(max_workspace_3d, ivec3(gx, gz, 0)); } else if (psc(dims) == 3 && axis == 2) { v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - max_value = image2d_ld1(max_workspace_2d, ivec2(gy, gz)); + max_value = image3d_ld1(max_workspace_3d, ivec3(gy, gz, 0)); } #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -144,18 +139,7 @@ void main() v = exp(v - max_value); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st1(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st1(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/softmax_exp_sub_max_pack4.comp b/src/layer/vulkan/shader/softmax_exp_sub_max_pack4.comp index d99d2af5e..2ea8d9cb1 100644 --- a/src/layer/vulkan/shader/softmax_exp_sub_max_pack4.comp +++ b/src/layer/vulkan/shader/softmax_exp_sub_max_pack4.comp @@ -37,14 +37,9 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; -layout (binding = 2) uniform unfp sampler1D max_workspace_1d; -layout (binding = 2) uniform unfp sampler2D max_workspace_2d; +layout (binding = 2) uniform unfp sampler3D max_workspace_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer max_workspace { sfpvec4 max_workspace_data[]; }; @@ -80,33 +75,33 @@ void main() if (psc(dims) == 1) // axis == 0 { - v = image1d_ld4(bottom_blob_1d, gx); - max_value = image1d_ld4(max_workspace_1d, 0); + v = image3d_ld4(bottom_blob_3d, ivec3(gx, 0, 0)); + max_value = image3d_ld4(max_workspace_3d, ivec3(0, 0, 0)); } else if (psc(dims) == 2 && axis == 0) { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - max_value = image1d_ld4(max_workspace_1d, gx); + v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, 0)); + max_value = image3d_ld4(max_workspace_3d, ivec3(gx, 0, 0)); } else if (psc(dims) == 2 && axis == 1) { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - max_value = image1d_ld4(max_workspace_1d, gy); + v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, 0)); + max_value = image3d_ld4(max_workspace_3d, ivec3(gy, 0, 0)); } else if (psc(dims) == 3 && axis == 0) { v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - max_value = image2d_ld4(max_workspace_2d, ivec2(gx, gy)); + max_value = image3d_ld4(max_workspace_3d, ivec3(gx, gy, 0)); } else if (psc(dims) == 3 && axis == 1) { v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - max_value = image2d_ld4(max_workspace_2d, ivec2(gx, gz)); + max_value = image3d_ld4(max_workspace_3d, ivec3(gx, gz, 0)); } else if (psc(dims) == 3 && axis == 2) { v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - max_value = image2d_ld4(max_workspace_2d, ivec2(gy, gz)); + max_value = image3d_ld4(max_workspace_3d, ivec3(gy, gz, 0)); } #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -153,18 +148,7 @@ void main() v = exp(v - max_value); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st4(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st4(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/softmax_exp_sub_max_pack8.comp b/src/layer/vulkan/shader/softmax_exp_sub_max_pack8.comp index e03f38717..1bfb04236 100644 --- a/src/layer/vulkan/shader/softmax_exp_sub_max_pack8.comp +++ b/src/layer/vulkan/shader/softmax_exp_sub_max_pack8.comp @@ -38,14 +38,9 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; -layout (binding = 2) uniform unfp sampler1D max_workspace_1d; -layout (binding = 2) uniform unfp sampler2D max_workspace_2d; +layout (binding = 2) uniform unfp sampler3D max_workspace_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; layout (binding = 1) readonly buffer max_workspace { sfpvec8 max_workspace_data[]; }; @@ -81,33 +76,33 @@ void main() if (psc(dims) == 1) // axis == 0 { - v = image1d_ld8(bottom_blob_1d, gx); - max_value = image1d_ld8(max_workspace_1d, 0); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, 0, 0)); + max_value = image3d_ld8(max_workspace_3d, ivec3(0, 0, 0)); } else if (psc(dims) == 2 && axis == 0) { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - max_value = image1d_ld8(max_workspace_1d, gx); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, 0)); + max_value = image3d_ld8(max_workspace_3d, ivec3(gx, 0, 0)); } else if (psc(dims) == 2 && axis == 1) { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - max_value = image1d_ld8(max_workspace_1d, gy); + v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, 0)); + max_value = image3d_ld8(max_workspace_3d, ivec3(gy, 0, 0)); } else if (psc(dims) == 3 && axis == 0) { v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - max_value = image2d_ld8(max_workspace_2d, ivec2(gx, gy)); + max_value = image3d_ld8(max_workspace_3d, ivec3(gx, gy, 0)); } else if (psc(dims) == 3 && axis == 1) { v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - max_value = image2d_ld8(max_workspace_2d, ivec2(gx, gz)); + max_value = image3d_ld8(max_workspace_3d, ivec3(gx, gz, 0)); } else if (psc(dims) == 3 && axis == 2) { v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - max_value = image2d_ld8(max_workspace_2d, ivec2(gy, gz)); + max_value = image3d_ld8(max_workspace_3d, ivec3(gy, gz, 0)); } #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -155,18 +150,7 @@ void main() v[1] = exp(v[1] - max_value[1]); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st8(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st8(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/softmax_reduce_max.comp b/src/layer/vulkan/shader/softmax_reduce_max.comp index af43aa5e8..3222d1491 100644 --- a/src/layer/vulkan/shader/softmax_reduce_max.comp +++ b/src/layer/vulkan/shader/softmax_reduce_max.comp @@ -37,11 +37,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_top_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_top_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_top_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D max_workspace_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D max_workspace_2d; +layout (binding = 1, imfmtc1) writeonly uniform unfp image3D max_workspace_3d; #else layout (binding = 0) readonly buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; layout (binding = 1) writeonly buffer max_workspace { sfp max_workspace_data[]; }; @@ -78,14 +75,14 @@ void main() for (int i = 0; i < psc(w); i++) { #if NCNN_image_shader - afp v = image1d_ld1(bottom_top_blob_1d, i); + afp v = image3d_ld1(bottom_top_blob_3d, ivec3(i, 0, 0)); #else afp v = buffer_ld1(bottom_top_blob_data, i); #endif max_value = max(max_value, v); } #if NCNN_image_shader - image1d_st1(max_workspace_1d, 0, max_value); + image3d_st1(max_workspace_3d, ivec3(0, 0, 0), max_value); #else buffer_st1(max_workspace_data, 0, max_value); #endif @@ -97,7 +94,7 @@ void main() for (int i = 0; i < psc(h); i++) { #if NCNN_image_shader - afp v = image2d_ld1(bottom_top_blob_2d, ivec2(gx, i)); + afp v = image3d_ld1(bottom_top_blob_3d, ivec3(gx, i, 0)); #else int v_offset = i * psc(w) + gx; afp v = buffer_ld1(bottom_top_blob_data, v_offset); @@ -105,7 +102,7 @@ void main() max_value = max(max_value, v); } #if NCNN_image_shader - image1d_st1(max_workspace_1d, gx, max_value); + image3d_st1(max_workspace_3d, ivec3(gx, 0, 0), max_value); #else buffer_st1(max_workspace_data, gx, max_value); #endif @@ -117,7 +114,7 @@ void main() for (int i = 0; i < psc(w); i++) { #if NCNN_image_shader - afp v = image2d_ld1(bottom_top_blob_2d, ivec2(i, gx)); + afp v = image3d_ld1(bottom_top_blob_3d, ivec3(i, gx, 0)); #else int v_offset = gx * psc(w) + i; afp v = buffer_ld1(bottom_top_blob_data, v_offset); @@ -125,7 +122,7 @@ void main() max_value = max(max_value, v); } #if NCNN_image_shader - image1d_st1(max_workspace_1d, gx, max_value); + image3d_st1(max_workspace_3d, ivec3(gx, 0, 0), max_value); #else buffer_st1(max_workspace_data, gx, max_value); #endif @@ -145,7 +142,7 @@ void main() max_value = max(max_value, v); } #if NCNN_image_shader - image2d_st1(max_workspace_2d, ivec2(gx, gy), max_value); + image3d_st1(max_workspace_3d, ivec3(gx, gy, 0), max_value); #else buffer_st1(max_workspace_data, gy * psc(w) + gx, max_value); #endif @@ -165,7 +162,7 @@ void main() max_value = max(max_value, v); } #if NCNN_image_shader - image2d_st1(max_workspace_2d, ivec2(gx, gy), max_value); + image3d_st1(max_workspace_3d, ivec3(gx, gy, 0), max_value); #else buffer_st1(max_workspace_data, gy * psc(w) + gx, max_value); #endif @@ -185,7 +182,7 @@ void main() max_value = max(max_value, v); } #if NCNN_image_shader - image2d_st1(max_workspace_2d, ivec2(gx, gy), max_value); + image3d_st1(max_workspace_3d, ivec3(gx, gy, 0), max_value); #else buffer_st1(max_workspace_data, gy * psc(h) + gx, max_value); #endif diff --git a/src/layer/vulkan/shader/softmax_reduce_max_pack4.comp b/src/layer/vulkan/shader/softmax_reduce_max_pack4.comp index 57387da51..82a8726cc 100644 --- a/src/layer/vulkan/shader/softmax_reduce_max_pack4.comp +++ b/src/layer/vulkan/shader/softmax_reduce_max_pack4.comp @@ -37,11 +37,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_top_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_top_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_top_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D max_workspace_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D max_workspace_2d; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D max_workspace_3d; #else layout (binding = 0) readonly buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (binding = 1) writeonly buffer max_workspace { sfpvec4 max_workspace_data[]; }; @@ -78,7 +75,7 @@ void main() for (int i = 0; i < psc(w); i++) { #if NCNN_image_shader - afpvec4 v = image1d_ld4(bottom_top_blob_1d, i); + afpvec4 v = image3d_ld4(bottom_top_blob_3d, ivec3(i, 0, 0)); #else afpvec4 v = buffer_ld4(bottom_top_blob_data, i); #endif @@ -87,7 +84,7 @@ void main() afpvec2 max2 = max(max_value.rg, max_value.ba); max_value = afpvec4(max(max2.r, max2.g)); #if NCNN_image_shader - image1d_st4(max_workspace_1d, 0, max_value); + image3d_st4(max_workspace_3d, ivec3(0, 0, 0), max_value); #else buffer_st4(max_workspace_data, 0, max_value); #endif @@ -99,7 +96,7 @@ void main() for (int i = 0; i < psc(h); i++) { #if NCNN_image_shader - afpvec4 v = image2d_ld4(bottom_top_blob_2d, ivec2(gx, i)); + afpvec4 v = image3d_ld4(bottom_top_blob_3d, ivec3(gx, i, 0)); #else int v_offset = i * psc(w) + gx; afpvec4 v = buffer_ld4(bottom_top_blob_data, v_offset); @@ -109,7 +106,7 @@ void main() afpvec2 max2 = max(max_value.rg, max_value.ba); max_value = afpvec4(max(max2.r, max2.g)); #if NCNN_image_shader - image1d_st4(max_workspace_1d, gx, max_value); + image3d_st4(max_workspace_3d, ivec3(gx, 0, 0), max_value); #else buffer_st4(max_workspace_data, gx, max_value); #endif @@ -121,7 +118,7 @@ void main() for (int i = 0; i < psc(w); i++) { #if NCNN_image_shader - afpvec4 v = image2d_ld4(bottom_top_blob_2d, ivec2(i, gx)); + afpvec4 v = image3d_ld4(bottom_top_blob_3d, ivec3(i, gx, 0)); #else int v_offset = gx * psc(w) + i; afpvec4 v = buffer_ld4(bottom_top_blob_data, v_offset); @@ -129,7 +126,7 @@ void main() max_value = max(max_value, v); } #if NCNN_image_shader - image1d_st4(max_workspace_1d, gx, max_value); + image3d_st4(max_workspace_3d, ivec3(gx, 0, 0), max_value); #else buffer_st4(max_workspace_data, gx, max_value); #endif @@ -151,7 +148,7 @@ void main() afpvec2 max2 = max(max_value.rg, max_value.ba); max_value = afpvec4(max(max2.r, max2.g)); #if NCNN_image_shader - image2d_st4(max_workspace_2d, ivec2(gx, gy), max_value); + image3d_st4(max_workspace_3d, ivec3(gx, gy, 0), max_value); #else buffer_st4(max_workspace_data, gy * psc(w) + gx, max_value); #endif @@ -171,7 +168,7 @@ void main() max_value = max(max_value, v); } #if NCNN_image_shader - image2d_st4(max_workspace_2d, ivec2(gx, gy), max_value); + image3d_st4(max_workspace_3d, ivec3(gx, gy, 0), max_value); #else buffer_st4(max_workspace_data, gy * psc(w) + gx, max_value); #endif @@ -191,7 +188,7 @@ void main() max_value = max(max_value, v); } #if NCNN_image_shader - image2d_st4(max_workspace_2d, ivec2(gx, gy), max_value); + image3d_st4(max_workspace_3d, ivec3(gx, gy, 0), max_value); #else buffer_st4(max_workspace_data, gy * psc(h) + gx, max_value); #endif diff --git a/src/layer/vulkan/shader/softmax_reduce_max_pack8.comp b/src/layer/vulkan/shader/softmax_reduce_max_pack8.comp index 5a2b02380..8ab82fc26 100644 --- a/src/layer/vulkan/shader/softmax_reduce_max_pack8.comp +++ b/src/layer/vulkan/shader/softmax_reduce_max_pack8.comp @@ -38,11 +38,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_top_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_top_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_top_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D max_workspace_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D max_workspace_2d; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D max_workspace_3d; #else layout (binding = 0) readonly buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; layout (binding = 1) writeonly buffer max_workspace { sfpvec8 max_workspace_data[]; }; @@ -79,7 +76,7 @@ void main() for (int i = 0; i < psc(w); i++) { #if NCNN_image_shader - afpvec8 v = image1d_ld8(bottom_top_blob_1d, i); + afpvec8 v = image3d_ld8(bottom_top_blob_3d, ivec3(i, 0, 0)); #else afpvec8 v = buffer_ld8(bottom_top_blob_data, i); #endif @@ -91,7 +88,7 @@ void main() afp max1 = max(max2.r, max2.g); max_value = afpvec8(afpvec4(max1), afpvec4(max1)); #if NCNN_image_shader - image1d_st8(max_workspace_1d, 0, max_value); + image3d_st8(max_workspace_3d, ivec3(0, 0, 0), max_value); #else buffer_st8(max_workspace_data, 0, max_value); #endif @@ -103,7 +100,7 @@ void main() for (int i = 0; i < psc(h); i++) { #if NCNN_image_shader - afpvec8 v = image2d_ld8(bottom_top_blob_2d, ivec2(gx, i)); + afpvec8 v = image3d_ld8(bottom_top_blob_3d, ivec3(gx, i, 0)); #else int v_offset = i * psc(w) + gx; afpvec8 v = buffer_ld8(bottom_top_blob_data, v_offset); @@ -116,7 +113,7 @@ void main() afp max1 = max(max2.r, max2.g); max_value = afpvec8(afpvec4(max1), afpvec4(max1)); #if NCNN_image_shader - image1d_st8(max_workspace_1d, gx, max_value); + image3d_st8(max_workspace_3d, ivec3(gx, 0, 0), max_value); #else buffer_st8(max_workspace_data, gx, max_value); #endif @@ -128,7 +125,7 @@ void main() for (int i = 0; i < psc(w); i++) { #if NCNN_image_shader - afpvec8 v = image2d_ld8(bottom_top_blob_2d, ivec2(i, gx)); + afpvec8 v = image3d_ld8(bottom_top_blob_3d, ivec3(i, gx, 0)); #else int v_offset = gx * psc(w) + i; afpvec8 v = buffer_ld8(bottom_top_blob_data, v_offset); @@ -137,7 +134,7 @@ void main() max_value[1] = max(max_value[1], v[1]); } #if NCNN_image_shader - image1d_st8(max_workspace_1d, gx, max_value); + image3d_st8(max_workspace_3d, ivec3(gx, 0, 0), max_value); #else buffer_st8(max_workspace_data, gx, max_value); #endif @@ -162,7 +159,7 @@ void main() afp max1 = max(max2.r, max2.g); max_value = afpvec8(afpvec4(max1), afpvec4(max1)); #if NCNN_image_shader - image2d_st8(max_workspace_2d, ivec2(gx, gy), max_value); + image3d_st8(max_workspace_3d, ivec3(gx, gy, 0), max_value); #else buffer_st8(max_workspace_data, gy * psc(w) + gx, max_value); #endif @@ -183,7 +180,7 @@ void main() max_value[1] = max(max_value[1], v[1]); } #if NCNN_image_shader - image2d_st8(max_workspace_2d, ivec2(gx, gy), max_value); + image3d_st8(max_workspace_3d, ivec3(gx, gy, 0), max_value); #else buffer_st8(max_workspace_data, gy * psc(w) + gx, max_value); #endif @@ -204,7 +201,7 @@ void main() max_value[1] = max(max_value[1], v[1]); } #if NCNN_image_shader - image2d_st8(max_workspace_2d, ivec2(gx, gy), max_value); + image3d_st8(max_workspace_3d, ivec3(gx, gy, 0), max_value); #else buffer_st8(max_workspace_data, gy * psc(h) + gx, max_value); #endif diff --git a/src/layer/vulkan/shader/softmax_reduce_sum.comp b/src/layer/vulkan/shader/softmax_reduce_sum.comp index 7537532ac..ce61ca2c5 100644 --- a/src/layer/vulkan/shader/softmax_reduce_sum.comp +++ b/src/layer/vulkan/shader/softmax_reduce_sum.comp @@ -37,11 +37,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_top_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_top_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_top_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D sum_workspace_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D sum_workspace_2d; +layout (binding = 1, imfmtc1) writeonly uniform unfp image3D sum_workspace_3d; #else layout (binding = 0) readonly buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; layout (binding = 1) writeonly buffer sum_workspace { sfp sum_workspace_data[]; }; @@ -78,14 +75,14 @@ void main() for (int i = 0; i < psc(w); i++) { #if NCNN_image_shader - afp v = image1d_ld1(bottom_top_blob_1d, i); + afp v = image3d_ld1(bottom_top_blob_3d, ivec3(i, 0, 0)); #else afp v = buffer_ld1(bottom_top_blob_data, i); #endif sum_value += v; } #if NCNN_image_shader - image1d_st1(sum_workspace_1d, 0, sum_value); + image3d_st1(sum_workspace_3d, ivec3(0, 0, 0), sum_value); #else buffer_st1(sum_workspace_data, 0, sum_value); #endif @@ -97,7 +94,7 @@ void main() for (int i = 0; i < psc(h); i++) { #if NCNN_image_shader - afp v = image2d_ld1(bottom_top_blob_2d, ivec2(gx, i)); + afp v = image3d_ld1(bottom_top_blob_3d, ivec3(gx, i, 0)); #else int v_offset = i * psc(w) + gx; afp v = buffer_ld1(bottom_top_blob_data, v_offset); @@ -105,7 +102,7 @@ void main() sum_value += v; } #if NCNN_image_shader - image1d_st1(sum_workspace_1d, gx, sum_value); + image3d_st1(sum_workspace_3d, ivec3(gx, 0, 0), sum_value); #else buffer_st1(sum_workspace_data, gx, sum_value); #endif @@ -117,7 +114,7 @@ void main() for (int i = 0; i < psc(w); i++) { #if NCNN_image_shader - afp v = image2d_ld1(bottom_top_blob_2d, ivec2(i, gx)); + afp v = image3d_ld1(bottom_top_blob_3d, ivec3(i, gx, 0)); #else int v_offset = gx * psc(w) + i; afp v = buffer_ld1(bottom_top_blob_data, v_offset); @@ -125,7 +122,7 @@ void main() sum_value += v; } #if NCNN_image_shader - image1d_st1(sum_workspace_1d, gx, sum_value); + image3d_st1(sum_workspace_3d, ivec3(gx, 0, 0), sum_value); #else buffer_st1(sum_workspace_data, gx, sum_value); #endif @@ -145,7 +142,7 @@ void main() sum_value += v; } #if NCNN_image_shader - image2d_st1(sum_workspace_2d, ivec2(gx, gy), sum_value); + image3d_st1(sum_workspace_3d, ivec3(gx, gy, 0), sum_value); #else buffer_st1(sum_workspace_data, gy * psc(w) + gx, sum_value); #endif @@ -165,7 +162,7 @@ void main() sum_value += v; } #if NCNN_image_shader - image2d_st1(sum_workspace_2d, ivec2(gx, gy), sum_value); + image3d_st1(sum_workspace_3d, ivec3(gx, gy, 0), sum_value); #else buffer_st1(sum_workspace_data, gy * psc(w) + gx, sum_value); #endif @@ -185,7 +182,7 @@ void main() sum_value += v; } #if NCNN_image_shader - image2d_st1(sum_workspace_2d, ivec2(gx, gy), sum_value); + image3d_st1(sum_workspace_3d, ivec3(gx, gy, 0), sum_value); #else buffer_st1(sum_workspace_data, gy * psc(h) + gx, sum_value); #endif diff --git a/src/layer/vulkan/shader/softmax_reduce_sum_pack4.comp b/src/layer/vulkan/shader/softmax_reduce_sum_pack4.comp index 92e318da2..ed24e9270 100644 --- a/src/layer/vulkan/shader/softmax_reduce_sum_pack4.comp +++ b/src/layer/vulkan/shader/softmax_reduce_sum_pack4.comp @@ -37,11 +37,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_top_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_top_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_top_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D sum_workspace_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D sum_workspace_2d; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D sum_workspace_3d; #else layout (binding = 0) readonly buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; layout (binding = 1) writeonly buffer sum_workspace { sfpvec4 sum_workspace_data[]; }; @@ -78,7 +75,7 @@ void main() for (int i = 0; i < psc(w); i++) { #if NCNN_image_shader - afpvec4 v = image1d_ld4(bottom_top_blob_1d, i); + afpvec4 v = image3d_ld4(bottom_top_blob_3d, ivec3(i, 0, 0)); #else afpvec4 v = buffer_ld4(bottom_top_blob_data, i); #endif @@ -87,7 +84,7 @@ void main() afpvec2 sum2 = sum_value.rg + sum_value.ba; sum_value = afpvec4(sum2.r + sum2.g); #if NCNN_image_shader - image1d_st4(sum_workspace_1d, 0, sum_value); + image3d_st4(sum_workspace_3d, ivec3(0, 0, 0), sum_value); #else buffer_st4(sum_workspace_data, 0, sum_value); #endif @@ -99,7 +96,7 @@ void main() for (int i = 0; i < psc(h); i++) { #if NCNN_image_shader - afpvec4 v = image2d_ld4(bottom_top_blob_2d, ivec2(gx, i)); + afpvec4 v = image3d_ld4(bottom_top_blob_3d, ivec3(gx, i, 0)); #else int v_offset = i * psc(w) + gx; afpvec4 v = buffer_ld4(bottom_top_blob_data, v_offset); @@ -109,7 +106,7 @@ void main() afpvec2 sum2 = sum_value.rg + sum_value.ba; sum_value = afpvec4(sum2.r + sum2.g); #if NCNN_image_shader - image1d_st4(sum_workspace_1d, gx, sum_value); + image3d_st4(sum_workspace_3d, ivec3(gx, 0, 0), sum_value); #else buffer_st4(sum_workspace_data, gx, sum_value); #endif @@ -121,7 +118,7 @@ void main() for (int i = 0; i < psc(w); i++) { #if NCNN_image_shader - afpvec4 v = image2d_ld4(bottom_top_blob_2d, ivec2(i, gx)); + afpvec4 v = image3d_ld4(bottom_top_blob_3d, ivec3(i, gx, 0)); #else int v_offset = gx * psc(w) + i; afpvec4 v = buffer_ld4(bottom_top_blob_data, v_offset); @@ -129,7 +126,7 @@ void main() sum_value += v; } #if NCNN_image_shader - image1d_st4(sum_workspace_1d, gx, sum_value); + image3d_st4(sum_workspace_3d, ivec3(gx, 0, 0), sum_value); #else buffer_st4(sum_workspace_data, gx, sum_value); #endif @@ -151,7 +148,7 @@ void main() afpvec2 sum2 = sum_value.rg + sum_value.ba; sum_value = afpvec4(sum2.r + sum2.g); #if NCNN_image_shader - image2d_st4(sum_workspace_2d, ivec2(gx, gy), sum_value); + image3d_st4(sum_workspace_3d, ivec3(gx, gy, 0), sum_value); #else buffer_st4(sum_workspace_data, gy * psc(w) + gx, sum_value); #endif @@ -171,7 +168,7 @@ void main() sum_value += v; } #if NCNN_image_shader - image2d_st4(sum_workspace_2d, ivec2(gx, gy), sum_value); + image3d_st4(sum_workspace_3d, ivec3(gx, gy, 0), sum_value); #else buffer_st4(sum_workspace_data, gy * psc(w) + gx, sum_value); #endif @@ -191,7 +188,7 @@ void main() sum_value += v; } #if NCNN_image_shader - image2d_st4(sum_workspace_2d, ivec2(gx, gy), sum_value); + image3d_st4(sum_workspace_3d, ivec3(gx, gy, 0), sum_value); #else buffer_st4(sum_workspace_data, gy * psc(h) + gx, sum_value); #endif diff --git a/src/layer/vulkan/shader/softmax_reduce_sum_pack8.comp b/src/layer/vulkan/shader/softmax_reduce_sum_pack8.comp index 41f9418ac..024d0f276 100644 --- a/src/layer/vulkan/shader/softmax_reduce_sum_pack8.comp +++ b/src/layer/vulkan/shader/softmax_reduce_sum_pack8.comp @@ -38,11 +38,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_top_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_top_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_top_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D sum_workspace_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D sum_workspace_2d; +layout (binding = 1, imfmtc4) writeonly uniform unfp image3D sum_workspace_3d; #else layout (binding = 0) readonly buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; layout (binding = 1) writeonly buffer sum_workspace { sfpvec8 sum_workspace_data[]; }; @@ -79,7 +76,7 @@ void main() for (int i = 0; i < psc(w); i++) { #if NCNN_image_shader - afpvec8 v = image1d_ld8(bottom_top_blob_1d, i); + afpvec8 v = image3d_ld8(bottom_top_blob_3d, ivec3(i, 0, 0)); #else afpvec8 v = buffer_ld8(bottom_top_blob_data, i); #endif @@ -90,7 +87,7 @@ void main() afp sum1 = sum2.r + sum2.g; sum_value = afpvec8(afpvec4(sum1), afpvec4(sum1)); #if NCNN_image_shader - image1d_st8(sum_workspace_1d, 0, sum_value); + image3d_st8(sum_workspace_3d, ivec3(0, 0, 0), sum_value); #else buffer_st8(sum_workspace_data, 0, sum_value); #endif @@ -102,7 +99,7 @@ void main() for (int i = 0; i < psc(h); i++) { #if NCNN_image_shader - afpvec8 v = image2d_ld8(bottom_top_blob_2d, ivec2(gx, i)); + afpvec8 v = image3d_ld8(bottom_top_blob_3d, ivec3(gx, i, 0)); #else int v_offset = i * psc(w) + gx; afpvec8 v = buffer_ld8(bottom_top_blob_data, v_offset); @@ -114,7 +111,7 @@ void main() afp sum1 = sum2.r + sum2.g; sum_value = afpvec8(afpvec4(sum1), afpvec4(sum1)); #if NCNN_image_shader - image1d_st8(sum_workspace_1d, gx, sum_value); + image3d_st8(sum_workspace_3d, ivec3(gx, 0, 0), sum_value); #else buffer_st8(sum_workspace_data, gx, sum_value); #endif @@ -126,7 +123,7 @@ void main() for (int i = 0; i < psc(w); i++) { #if NCNN_image_shader - afpvec8 v = image2d_ld8(bottom_top_blob_2d, ivec2(i, gx)); + afpvec8 v = image3d_ld8(bottom_top_blob_3d, ivec3(i, gx, 0)); #else int v_offset = gx * psc(w) + i; afpvec8 v = buffer_ld8(bottom_top_blob_data, v_offset); @@ -134,7 +131,7 @@ void main() sum_value += v; } #if NCNN_image_shader - image1d_st8(sum_workspace_1d, gx, sum_value); + image3d_st8(sum_workspace_3d, ivec3(gx, 0, 0), sum_value); #else buffer_st8(sum_workspace_data, gx, sum_value); #endif @@ -158,7 +155,7 @@ void main() afp sum1 = sum2.r + sum2.g; sum_value = afpvec8(afpvec4(sum1), afpvec4(sum1)); #if NCNN_image_shader - image2d_st8(sum_workspace_2d, ivec2(gx, gy), sum_value); + image3d_st8(sum_workspace_3d, ivec3(gx, gy, 0), sum_value); #else buffer_st8(sum_workspace_data, gy * psc(w) + gx, sum_value); #endif @@ -178,7 +175,7 @@ void main() sum_value += v; } #if NCNN_image_shader - image2d_st8(sum_workspace_2d, ivec2(gx, gy), sum_value); + image3d_st8(sum_workspace_3d, ivec3(gx, gy, 0), sum_value); #else buffer_st8(sum_workspace_data, gy * psc(w) + gx, sum_value); #endif @@ -198,7 +195,7 @@ void main() sum_value += v; } #if NCNN_image_shader - image2d_st8(sum_workspace_2d, ivec2(gx, gy), sum_value); + image3d_st8(sum_workspace_3d, ivec3(gx, gy, 0), sum_value); #else buffer_st8(sum_workspace_data, gy * psc(h) + gx, sum_value); #endif diff --git a/src/layer/vulkan/shader/tanh.comp b/src/layer/vulkan/shader/tanh.comp index 80695851e..46f1e5543 100644 --- a/src/layer/vulkan/shader/tanh.comp +++ b/src/layer/vulkan/shader/tanh.comp @@ -29,11 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; @@ -58,19 +54,7 @@ void main() return; #if NCNN_image_shader - afp v; - if (psc(dims) == 1) - { - v = image1d_ld1(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -80,18 +64,7 @@ void main() v = tanh(v); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st1(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st1(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/tanh_pack4.comp b/src/layer/vulkan/shader/tanh_pack4.comp index 436c49b33..732d70016 100644 --- a/src/layer/vulkan/shader/tanh_pack4.comp +++ b/src/layer/vulkan/shader/tanh_pack4.comp @@ -29,11 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; @@ -58,19 +54,7 @@ void main() return; #if NCNN_image_shader - afpvec4 v; - if (psc(dims) == 1) - { - v = image1d_ld4(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -80,18 +64,7 @@ void main() v = tanh(v); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st4(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st4(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/tanh_pack8.comp b/src/layer/vulkan/shader/tanh_pack8.comp index 9a3a544db..382c6a1ff 100644 --- a/src/layer/vulkan/shader/tanh_pack8.comp +++ b/src/layer/vulkan/shader/tanh_pack8.comp @@ -30,11 +30,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; @@ -59,19 +55,7 @@ void main() return; #if NCNN_image_shader - afpvec8 v; - if (psc(dims) == 1) - { - v = image1d_ld8(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -82,18 +66,7 @@ void main() v[1] = tanh(v[1]); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st8(top_blob_1d, gx, v); - } - else if (psc(dims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), v); - } - else // if (psc(dims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); #else buffer_st8(bottom_top_blob_data, gi, v); #endif diff --git a/src/layer/vulkan/shader/unaryop.comp b/src/layer/vulkan/shader/unaryop.comp index cb7634575..44e0c544a 100644 --- a/src/layer/vulkan/shader/unaryop.comp +++ b/src/layer/vulkan/shader/unaryop.comp @@ -31,11 +31,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; @@ -60,19 +56,7 @@ void main() return; #if NCNN_image_shader - afp v; - if (psc(dims) == 1) - { - v = image1d_ld1(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -100,18 +84,7 @@ void main() if (op_type == 16) res = tanh(v); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st1(top_blob_1d, gx, res); - } - else if (psc(dims) == 2) - { - image2d_st1(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(dims) == 3) - { - image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st1(bottom_top_blob_data, gi, res); #endif diff --git a/src/layer/vulkan/shader/unaryop_pack4.comp b/src/layer/vulkan/shader/unaryop_pack4.comp index 912e1bf27..1d6e71c22 100644 --- a/src/layer/vulkan/shader/unaryop_pack4.comp +++ b/src/layer/vulkan/shader/unaryop_pack4.comp @@ -31,11 +31,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; @@ -60,19 +56,7 @@ void main() return; #if NCNN_image_shader - afpvec4 v; - if (psc(dims) == 1) - { - v = image1d_ld4(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -100,18 +84,7 @@ void main() if (op_type == 16) res = tanh(v); #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st4(top_blob_1d, gx, res); - } - else if (psc(dims) == 2) - { - image2d_st4(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(dims) == 3) - { - image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st4(bottom_top_blob_data, gi, res); #endif diff --git a/src/layer/vulkan/shader/unaryop_pack8.comp b/src/layer/vulkan/shader/unaryop_pack8.comp index 3d8c127cd..d888ea2b2 100644 --- a/src/layer/vulkan/shader/unaryop_pack8.comp +++ b/src/layer/vulkan/shader/unaryop_pack8.comp @@ -32,11 +32,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; #if NCNN_image_shader -layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; -layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; -layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; #else layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; @@ -61,19 +57,7 @@ void main() return; #if NCNN_image_shader - afpvec8 v; - if (psc(dims) == 1) - { - v = image1d_ld8(bottom_blob_1d, gx); - } - else if (psc(dims) == 2) - { - v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); - } - else // if (psc(dims) == 3) - { - v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); - } + afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); #else const int gi = gz * psc(cstep) + gy * psc(w) + gx; @@ -169,18 +153,7 @@ void main() } #if NCNN_image_shader - if (psc(dims) == 1) - { - image1d_st8(top_blob_1d, gx, res); - } - else if (psc(dims) == 2) - { - image2d_st8(top_blob_2d, ivec2(gx, gy), res); - } - else // if (psc(dims) == 3) - { - image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); - } + image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); #else buffer_st8(bottom_top_blob_data, gi, res); #endif diff --git a/src/mat.h b/src/mat.h index f2f07177a..e9f14cc8f 100644 --- a/src/mat.h +++ b/src/mat.h @@ -1987,7 +1987,7 @@ inline void VkImageMat::create(int _w, size_t _elemsize, VkAllocator* _allocator if (total() > 0) { - data = allocator->fastMalloc(dims, w, h, c, elemsize, elempack); + data = allocator->fastMalloc(w, h, c, elemsize, elempack); if (!data) return; @@ -2014,7 +2014,7 @@ inline void VkImageMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _a if (total() > 0) { - data = allocator->fastMalloc(dims, w, h, c, elemsize, elempack); + data = allocator->fastMalloc(w, h, c, elemsize, elempack); if (!data) return; @@ -2041,7 +2041,7 @@ inline void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, VkAlloc if (total() > 0) { - data = allocator->fastMalloc(dims, w, h, c, elemsize, elempack); + data = allocator->fastMalloc(w, h, c, elemsize, elempack); if (!data) return; @@ -2068,7 +2068,7 @@ inline void VkImageMat::create(int _w, size_t _elemsize, int _elempack, VkAlloca if (total() > 0) { - data = allocator->fastMalloc(dims, w, h, c, elemsize, elempack); + data = allocator->fastMalloc(w, h, c, elemsize, elempack); if (!data) return; @@ -2095,7 +2095,7 @@ inline void VkImageMat::create(int _w, int _h, size_t _elemsize, int _elempack, if (total() > 0) { - data = allocator->fastMalloc(dims, w, h, c, elemsize, elempack); + data = allocator->fastMalloc(w, h, c, elemsize, elempack); if (!data) return; @@ -2122,7 +2122,7 @@ inline void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, int _el if (total() > 0) { - data = allocator->fastMalloc(dims, w, h, c, elemsize, elempack); + data = allocator->fastMalloc(w, h, c, elemsize, elempack); if (!data) return; diff --git a/src/net.cpp b/src/net.cpp index 18ec31d0a..1851e5e14 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -165,8 +165,6 @@ int Net::load_param(const DataReader& dr) // TODO give user a choice if (vkdev->info.bug_storage_buffer_no_l1) opt.use_image_storage = true; - if (vkdev->info.bug_layout_binding_id_alias) opt.use_image_storage = false; - // fp16a makes no sense when fp16 storage disabled if (!opt.use_fp16_packed && !opt.use_fp16_storage) opt.use_fp16_arithmetic = false; } @@ -381,8 +379,6 @@ int Net::load_param_bin(const DataReader& dr) // TODO give user a choice if (vkdev->info.bug_storage_buffer_no_l1) opt.use_image_storage = true; - if (vkdev->info.bug_layout_binding_id_alias) opt.use_image_storage = false; - // fp16a makes no sense when fp16 storage disabled if (!opt.use_fp16_packed && !opt.use_fp16_storage) opt.use_fp16_arithmetic = false; } @@ -579,8 +575,6 @@ int Net::load_model(const DataReader& dr) pipeline_cache = new PipelineCache(vkdev); opt.pipeline_cache = pipeline_cache; } - - if (vkdev->info.bug_layout_binding_id_alias) opt.use_image_storage = false; } #endif // NCNN_VULKAN diff --git a/tests/test_cast.cpp b/tests/test_cast.cpp index f5e613dab..45a70aa4b 100644 --- a/tests/test_cast.cpp +++ b/tests/test_cast.cpp @@ -403,9 +403,6 @@ static int test_cast_gpu_image_fp16p(const ncnn::Mat& a, int type_from, int type ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device(); - if (vkdev->info.bug_layout_binding_id_alias) - return 0; - ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator(); ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator(); @@ -522,9 +519,6 @@ static int test_cast_gpu_image_fp16p_pack8(const ncnn::Mat& a, int type_from, in ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device(); - if (vkdev->info.bug_layout_binding_id_alias) - return 0; - ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator(); ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator(); diff --git a/tests/test_packing.cpp b/tests/test_packing.cpp index 93c2b244c..281b824d0 100644 --- a/tests/test_packing.cpp +++ b/tests/test_packing.cpp @@ -244,9 +244,6 @@ static int test_packing_gpu_image(const ncnn::Mat& a, int in_elempack, int out_e ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device(); - if (vkdev->info.bug_layout_binding_id_alias) - return 0; - ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator(); ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator(); @@ -334,9 +331,6 @@ static int test_packing_gpu_buffer2image(const ncnn::Mat& a, int in_elempack, in ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device(); - if (vkdev->info.bug_layout_binding_id_alias) - return 0; - ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator(); ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator(); @@ -424,9 +418,6 @@ static int test_packing_gpu_image2buffer(const ncnn::Mat& a, int in_elempack, in ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device(); - if (vkdev->info.bug_layout_binding_id_alias) - return 0; - ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator(); ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();