Browse Source

unified image shader storage type (#2231)

* drop bug_layout_binding_id_alias flag
tags/20201208
nihui GitHub 5 years ago
parent
commit
cf3cf83cd3
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
100 changed files with 561 additions and 1774 deletions
  1. +22
    -106
      src/allocator.cpp
  2. +8
    -10
      src/allocator.h
  3. +2
    -51
      src/gpu.cpp
  4. +0
    -1
      src/gpu.h
  5. +2
    -29
      src/layer/vulkan/shader/absval.comp
  6. +2
    -29
      src/layer/vulkan/shader/absval_pack4.comp
  7. +2
    -29
      src/layer/vulkan/shader/absval_pack8.comp
  8. +6
    -33
      src/layer/vulkan/shader/batchnorm.comp
  9. +6
    -33
      src/layer/vulkan/shader/batchnorm_pack4.comp
  10. +6
    -33
      src/layer/vulkan/shader/batchnorm_pack8.comp
  11. +4
    -58
      src/layer/vulkan/shader/binaryop.comp
  12. +58
    -75
      src/layer/vulkan/shader/binaryop_broadcast.comp
  13. +3
    -31
      src/layer/vulkan/shader/binaryop_broadcast_a1_pack4.comp
  14. +3
    -31
      src/layer/vulkan/shader/binaryop_broadcast_a1_pack8.comp
  15. +3
    -31
      src/layer/vulkan/shader/binaryop_broadcast_b1_pack4.comp
  16. +3
    -31
      src/layer/vulkan/shader/binaryop_broadcast_b1_pack8.comp
  17. +58
    -75
      src/layer/vulkan/shader/binaryop_broadcast_pack4.comp
  18. +58
    -75
      src/layer/vulkan/shader/binaryop_broadcast_pack8.comp
  19. +4
    -58
      src/layer/vulkan/shader/binaryop_pack4.comp
  20. +4
    -58
      src/layer/vulkan/shader/binaryop_pack8.comp
  21. +1
    -16
      src/layer/vulkan/shader/cast_fp16_to_fp32.comp
  22. +1
    -16
      src/layer/vulkan/shader/cast_fp16_to_fp32_pack4.comp
  23. +1
    -16
      src/layer/vulkan/shader/cast_fp16_to_fp32_pack8.comp
  24. +1
    -16
      src/layer/vulkan/shader/cast_fp32_to_fp16.comp
  25. +1
    -16
      src/layer/vulkan/shader/cast_fp32_to_fp16_pack4.comp
  26. +1
    -16
      src/layer/vulkan/shader/cast_fp32_to_fp16_pack8.comp
  27. +2
    -29
      src/layer/vulkan/shader/clip.comp
  28. +2
    -29
      src/layer/vulkan/shader/clip_pack4.comp
  29. +2
    -29
      src/layer/vulkan/shader/clip_pack8.comp
  30. +5
    -23
      src/layer/vulkan/shader/concat.comp
  31. +5
    -23
      src/layer/vulkan/shader/concat_pack4.comp
  32. +31
    -51
      src/layer/vulkan/shader/concat_pack4to1.comp
  33. +5
    -23
      src/layer/vulkan/shader/concat_pack8.comp
  34. +55
    -75
      src/layer/vulkan/shader/concat_pack8to1.comp
  35. +19
    -39
      src/layer/vulkan/shader/concat_pack8to4.comp
  36. +2
    -2
      src/layer/vulkan/shader/convolution.comp
  37. +2
    -2
      src/layer/vulkan/shader/convolution_1x1s1d1.comp
  38. +2
    -2
      src/layer/vulkan/shader/convolution_pack1to4.comp
  39. +2
    -2
      src/layer/vulkan/shader/convolution_pack1to8.comp
  40. +2
    -2
      src/layer/vulkan/shader/convolution_pack4.comp
  41. +2
    -2
      src/layer/vulkan/shader/convolution_pack4_1x1s1d1.comp
  42. +2
    -2
      src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd23_transform_output.comp
  43. +2
    -2
      src/layer/vulkan/shader/convolution_pack4to1.comp
  44. +2
    -2
      src/layer/vulkan/shader/convolution_pack4to8.comp
  45. +2
    -2
      src/layer/vulkan/shader/convolution_pack8.comp
  46. +2
    -2
      src/layer/vulkan/shader/convolution_pack8_1x1s1d1.comp
  47. +2
    -2
      src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd23_transform_output.comp
  48. +2
    -2
      src/layer/vulkan/shader/convolution_pack8to1.comp
  49. +2
    -2
      src/layer/vulkan/shader/convolution_pack8to4.comp
  50. +4
    -4
      src/layer/vulkan/shader/convolutiondepthwise.comp
  51. +2
    -2
      src/layer/vulkan/shader/convolutiondepthwise_group.comp
  52. +2
    -2
      src/layer/vulkan/shader/convolutiondepthwise_group_pack1to4.comp
  53. +2
    -2
      src/layer/vulkan/shader/convolutiondepthwise_group_pack1to8.comp
  54. +2
    -2
      src/layer/vulkan/shader/convolutiondepthwise_group_pack4.comp
  55. +2
    -2
      src/layer/vulkan/shader/convolutiondepthwise_group_pack4to1.comp
  56. +2
    -2
      src/layer/vulkan/shader/convolutiondepthwise_group_pack4to8.comp
  57. +2
    -2
      src/layer/vulkan/shader/convolutiondepthwise_group_pack8.comp
  58. +2
    -2
      src/layer/vulkan/shader/convolutiondepthwise_group_pack8to1.comp
  59. +2
    -2
      src/layer/vulkan/shader/convolutiondepthwise_group_pack8to4.comp
  60. +4
    -4
      src/layer/vulkan/shader/convolutiondepthwise_pack4.comp
  61. +4
    -4
      src/layer/vulkan/shader/convolutiondepthwise_pack8.comp
  62. +2
    -2
      src/layer/vulkan/shader/deconvolution.comp
  63. +2
    -2
      src/layer/vulkan/shader/deconvolution_pack1to4.comp
  64. +2
    -2
      src/layer/vulkan/shader/deconvolution_pack1to8.comp
  65. +2
    -2
      src/layer/vulkan/shader/deconvolution_pack4.comp
  66. +2
    -2
      src/layer/vulkan/shader/deconvolution_pack4to1.comp
  67. +2
    -2
      src/layer/vulkan/shader/deconvolution_pack4to8.comp
  68. +2
    -2
      src/layer/vulkan/shader/deconvolution_pack8.comp
  69. +2
    -2
      src/layer/vulkan/shader/deconvolution_pack8to1.comp
  70. +2
    -2
      src/layer/vulkan/shader/deconvolution_pack8to4.comp
  71. +4
    -4
      src/layer/vulkan/shader/deconvolutiondepthwise.comp
  72. +2
    -2
      src/layer/vulkan/shader/deconvolutiondepthwise_group.comp
  73. +2
    -2
      src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to4.comp
  74. +2
    -2
      src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to8.comp
  75. +2
    -2
      src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4.comp
  76. +2
    -2
      src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to1.comp
  77. +2
    -2
      src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to8.comp
  78. +2
    -2
      src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8.comp
  79. +2
    -2
      src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to1.comp
  80. +2
    -2
      src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to4.comp
  81. +4
    -4
      src/layer/vulkan/shader/deconvolutiondepthwise_pack4.comp
  82. +4
    -4
      src/layer/vulkan/shader/deconvolutiondepthwise_pack8.comp
  83. +1
    -16
      src/layer/vulkan/shader/deepcopy.comp
  84. +1
    -16
      src/layer/vulkan/shader/deepcopy_pack4.comp
  85. +1
    -16
      src/layer/vulkan/shader/deepcopy_pack8.comp
  86. +2
    -29
      src/layer/vulkan/shader/dropout.comp
  87. +2
    -29
      src/layer/vulkan/shader/dropout_pack4.comp
  88. +2
    -29
      src/layer/vulkan/shader/dropout_pack8.comp
  89. +3
    -35
      src/layer/vulkan/shader/eltwise.comp
  90. +3
    -35
      src/layer/vulkan/shader/eltwise_pack4.comp
  91. +3
    -35
      src/layer/vulkan/shader/eltwise_pack8.comp
  92. +3
    -4
      src/layer/vulkan/shader/flatten.comp
  93. +6
    -7
      src/layer/vulkan/shader/flatten_pack1to4.comp
  94. +10
    -11
      src/layer/vulkan/shader/flatten_pack1to8.comp
  95. +6
    -7
      src/layer/vulkan/shader/flatten_pack4.comp
  96. +10
    -11
      src/layer/vulkan/shader/flatten_pack4to8.comp
  97. +10
    -11
      src/layer/vulkan/shader/flatten_pack8.comp
  98. +2
    -25
      src/layer/vulkan/shader/hardsigmoid.comp
  99. +2
    -29
      src/layer/vulkan/shader/hardsigmoid_pack4.comp
  100. +2
    -29
      src/layer/vulkan/shader/hardsigmoid_pack8.comp

+ 22
- 106
src/allocator.cpp View File

@@ -375,13 +375,13 @@ VkDeviceMemory VkAllocator::allocate_dedicated_memory(size_t size, uint32_t memo
return memory;
}

VkImage VkAllocator::create_image(VkImageType type, int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage)
VkImage VkAllocator::create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage)
{
VkImageCreateInfo imageCreateInfo;
imageCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
imageCreateInfo.pNext = 0;
imageCreateInfo.flags = 0;
imageCreateInfo.imageType = type;
imageCreateInfo.imageType = VK_IMAGE_TYPE_3D;
imageCreateInfo.format = format;
imageCreateInfo.extent.width = width;
imageCreateInfo.extent.height = height;
@@ -400,21 +400,21 @@ VkImage VkAllocator::create_image(VkImageType type, int width, int height, int d
VkResult ret = vkCreateImage(vkdev->vkdevice(), &imageCreateInfo, 0, &image);
if (ret != VK_SUCCESS)
{
NCNN_LOGE("vkCreateImage failed %d %d %d %d %d %d %d %d", ret, type, width, height, depth, format, tiling, usage);
NCNN_LOGE("vkCreateImage failed %d %d %d %d %d %d %d", ret, width, height, depth, format, tiling, usage);
return 0;
}

return image;
}

VkImageView VkAllocator::create_imageview(VkImageViewType type, VkImage image, VkFormat format)
VkImageView VkAllocator::create_imageview(VkImage image, VkFormat format)
{
VkImageViewCreateInfo imageViewCreateInfo;
imageViewCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
imageViewCreateInfo.pNext = 0;
imageViewCreateInfo.flags = 0;
imageViewCreateInfo.image = image;
imageViewCreateInfo.viewType = type;
imageViewCreateInfo.viewType = VK_IMAGE_VIEW_TYPE_3D;
imageViewCreateInfo.format = format;
imageViewCreateInfo.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
imageViewCreateInfo.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
@@ -709,7 +709,7 @@ void VkBlobAllocator::fastFree(VkBufferMemory* ptr)
delete ptr;
}

VkImageMemory* VkBlobAllocator::fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack)
VkImageMemory* VkBlobAllocator::fastMalloc(int w, int h, int c, size_t elemsize, int elempack)
{
if (elempack != 1 && elempack != 4 && elempack != 8)
{
@@ -743,48 +743,16 @@ VkImageMemory* VkBlobAllocator::fastMalloc(int dims, int w, int h, int c, size_t
// large elempack spills on image w
if (elempack == 8) width *= 2;

VkImageType image_type;
VkImageViewType imageview_type;
if (dims == 1)
if (width > (int)vkdev->info.max_image_dimension_3d || height > (int)vkdev->info.max_image_dimension_3d || depth > (int)vkdev->info.max_image_dimension_3d)
{
image_type = VK_IMAGE_TYPE_1D;
imageview_type = VK_IMAGE_VIEW_TYPE_1D;

if (width > (int)vkdev->info.max_image_dimension_1d)
{
NCNN_LOGE("image dimension too large %d > %d", width, (int)vkdev->info.max_image_dimension_1d);
return 0;
}
}
else if (dims == 2)
{
image_type = VK_IMAGE_TYPE_2D;
imageview_type = VK_IMAGE_VIEW_TYPE_2D;

if (width > (int)vkdev->info.max_image_dimension_2d || height > (int)vkdev->info.max_image_dimension_2d)
{
NCNN_LOGE("image dimension too large %d %d > %d", width, height, (int)vkdev->info.max_image_dimension_2d);
return 0;
}
}
else // if (dims == 3)
{
image_type = VK_IMAGE_TYPE_3D;
imageview_type = VK_IMAGE_VIEW_TYPE_3D;

if (width > (int)vkdev->info.max_image_dimension_3d || height > (int)vkdev->info.max_image_dimension_3d || depth > (int)vkdev->info.max_image_dimension_3d)
{
NCNN_LOGE("image dimension too large %d %d %d > %d", width, height, depth, (int)vkdev->info.max_image_dimension_3d);
return 0;
}
NCNN_LOGE("image dimension too large %d %d %d > %d", width, height, depth, (int)vkdev->info.max_image_dimension_3d);
return 0;
}

VkImageMemory* ptr = new VkImageMemory;

ptr->image = create_image(image_type, width, height, depth, format, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
ptr->image = create_image(width, height, depth, format, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);

ptr->image_type = image_type;
ptr->imageview_type = imageview_type;
ptr->width = width;
ptr->height = height;
ptr->depth = depth;
@@ -827,7 +795,7 @@ VkImageMemory* VkBlobAllocator::fastMalloc(int dims, int w, int h, int c, size_t
// do not allow host access to optimal tiling image
ptr->mapped_ptr = 0;

ptr->imageview = create_imageview(imageview_type, ptr->image, format);
ptr->imageview = create_imageview(ptr->image, format);

ptr->access_flags = 0;
ptr->image_layout = VK_IMAGE_LAYOUT_UNDEFINED;
@@ -894,7 +862,7 @@ VkImageMemory* VkBlobAllocator::fastMalloc(int dims, int w, int h, int c, size_t
// do not allow host access to optimal tiling image
ptr->mapped_ptr = 0;

ptr->imageview = create_imageview(imageview_type, ptr->image, format);
ptr->imageview = create_imageview(ptr->image, format);

ptr->access_flags = 0;
ptr->image_layout = VK_IMAGE_LAYOUT_UNDEFINED;
@@ -1244,7 +1212,7 @@ void VkWeightAllocator::fastFree(VkBufferMemory* ptr)
delete ptr;
}

VkImageMemory* VkWeightAllocator::fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack)
VkImageMemory* VkWeightAllocator::fastMalloc(int w, int h, int c, size_t elemsize, int elempack)
{
if (elempack != 1 && elempack != 4 && elempack != 8 && elempack != 16 && elempack != 32 && elempack != 64)
{
@@ -1287,48 +1255,16 @@ VkImageMemory* VkWeightAllocator::fastMalloc(int dims, int w, int h, int c, size
if (elempack == 32) width *= 8;
if (elempack == 64) width *= 16;

VkImageType image_type;
VkImageViewType imageview_type;
if (dims == 1)
if (width > (int)vkdev->info.max_image_dimension_3d || height > (int)vkdev->info.max_image_dimension_3d || depth > (int)vkdev->info.max_image_dimension_3d)
{
image_type = VK_IMAGE_TYPE_1D;
imageview_type = VK_IMAGE_VIEW_TYPE_1D;

if (width > (int)vkdev->info.max_image_dimension_1d)
{
NCNN_LOGE("image dimension too large %d > %d", width, (int)vkdev->info.max_image_dimension_1d);
return 0;
}
}
else if (dims == 2)
{
image_type = VK_IMAGE_TYPE_2D;
imageview_type = VK_IMAGE_VIEW_TYPE_2D;

if (width > (int)vkdev->info.max_image_dimension_2d || height > (int)vkdev->info.max_image_dimension_2d)
{
NCNN_LOGE("image dimension too large %d %d > %d", width, height, (int)vkdev->info.max_image_dimension_2d);
return 0;
}
}
else // if (dims == 3)
{
image_type = VK_IMAGE_TYPE_3D;
imageview_type = VK_IMAGE_VIEW_TYPE_3D;

if (width > (int)vkdev->info.max_image_dimension_3d || height > (int)vkdev->info.max_image_dimension_3d || depth > (int)vkdev->info.max_image_dimension_3d)
{
NCNN_LOGE("image dimension too large %d %d %d > %d", width, height, depth, (int)vkdev->info.max_image_dimension_3d);
return 0;
}
NCNN_LOGE("image dimension too large %d %d %d > %d", width, height, depth, (int)vkdev->info.max_image_dimension_3d);
return 0;
}

VkImageMemory* ptr = new VkImageMemory;

ptr->image = create_image(image_type, width, height, depth, format, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
ptr->image = create_image(width, height, depth, format, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);

ptr->image_type = image_type;
ptr->imageview_type = imageview_type;
ptr->width = width;
ptr->height = height;
ptr->depth = depth;
@@ -1385,7 +1321,7 @@ VkImageMemory* VkWeightAllocator::fastMalloc(int dims, int w, int h, int c, size
// do not allow host access to optimal tiling image
ptr->mapped_ptr = 0;

ptr->imageview = create_imageview(imageview_type, ptr->image, format);
ptr->imageview = create_imageview(ptr->image, format);

ptr->access_flags = 0;
ptr->image_layout = VK_IMAGE_LAYOUT_UNDEFINED;
@@ -1426,7 +1362,7 @@ VkImageMemory* VkWeightAllocator::fastMalloc(int dims, int w, int h, int c, size
// do not allow host access to optimal tiling image
ptr->mapped_ptr = 0;

ptr->imageview = create_imageview(imageview_type, ptr->image, format);
ptr->imageview = create_imageview(ptr->image, format);

ptr->access_flags = 0;
ptr->image_layout = VK_IMAGE_LAYOUT_UNDEFINED;
@@ -1482,7 +1418,7 @@ VkImageMemory* VkWeightAllocator::fastMalloc(int dims, int w, int h, int c, size
// do not allow host access to optimal tiling image
ptr->mapped_ptr = 0;

ptr->imageview = create_imageview(imageview_type, ptr->image, format);
ptr->imageview = create_imageview(ptr->image, format);

ptr->access_flags = 0;
ptr->image_layout = VK_IMAGE_LAYOUT_UNDEFINED;
@@ -1612,36 +1548,16 @@ void VkStagingAllocator::fastFree(VkBufferMemory* ptr)
buffer_budgets.push_back(ptr);
}

VkImageMemory* VkStagingAllocator::fastMalloc(int dims, int w, int h, int c, size_t elemsize, int /* elempack */)
VkImageMemory* VkStagingAllocator::fastMalloc(int w, int h, int c, size_t elemsize, int /* elempack */)
{
// staging image is mainly used for storing small piece of dynamic parameters
// we allocate host memory as a fake image, it's simple and good

const size_t size = w * h * c * elemsize;

VkImageType image_type;
VkImageViewType imageview_type;
if (dims == 1)
{
image_type = VK_IMAGE_TYPE_1D;
imageview_type = VK_IMAGE_VIEW_TYPE_1D;
}
else if (dims == 2)
{
image_type = VK_IMAGE_TYPE_2D;
imageview_type = VK_IMAGE_VIEW_TYPE_2D;
}
else // if (dims == 3)
{
image_type = VK_IMAGE_TYPE_3D;
imageview_type = VK_IMAGE_VIEW_TYPE_3D;
}

VkImageMemory* ptr = new VkImageMemory;

ptr->image = 0;
ptr->image_type = image_type;
ptr->imageview_type = imageview_type;
ptr->width = w;
ptr->height = h;
ptr->depth = c;
@@ -1746,7 +1662,7 @@ VkAndroidHardwareBufferImageAllocator::~VkAndroidHardwareBufferImageAllocator()
}
}

VkImageMemory* VkAndroidHardwareBufferImageAllocator::fastMalloc(int /*dims*/, int /*w*/, int /*h*/, int /*c*/, size_t /*elemsize*/, int /*elempack*/)
VkImageMemory* VkAndroidHardwareBufferImageAllocator::fastMalloc(int /*w*/, int /*h*/, int /*c*/, size_t /*elemsize*/, int /*elempack*/)
{
VkResult ret;



+ 8
- 10
src/allocator.h View File

@@ -224,8 +224,6 @@ public:
VkImageView imageview;

// underlying info assigned by allocator
VkImageType image_type;
VkImageViewType imageview_type;
int width;
int height;
int depth;
@@ -267,7 +265,7 @@ public:
virtual int flush(VkBufferMemory* ptr);
virtual int invalidate(VkBufferMemory* ptr);

virtual VkImageMemory* fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack) = 0;
virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack) = 0;
virtual void fastFree(VkImageMemory* ptr) = 0;

public:
@@ -282,8 +280,8 @@ protected:
VkDeviceMemory allocate_memory(size_t size, uint32_t memory_type_index);
VkDeviceMemory allocate_dedicated_memory(size_t size, uint32_t memory_type_index, VkImage image, VkBuffer buffer);

VkImage create_image(VkImageType type, int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage);
VkImageView create_imageview(VkImageViewType type, VkImage image, VkFormat format);
VkImage create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage);
VkImageView create_imageview(VkImage image, VkFormat format);
};

class VkBlobAllocator : public VkAllocator
@@ -298,7 +296,7 @@ public:

virtual VkBufferMemory* fastMalloc(size_t size);
virtual void fastFree(VkBufferMemory* ptr);
virtual VkImageMemory* fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack);
virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
virtual void fastFree(VkImageMemory* ptr);

protected:
@@ -324,7 +322,7 @@ public:
public:
virtual VkBufferMemory* fastMalloc(size_t size);
virtual void fastFree(VkBufferMemory* ptr);
virtual VkImageMemory* fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack);
virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
virtual void fastFree(VkImageMemory* ptr);

protected:
@@ -355,7 +353,7 @@ public:

virtual VkBufferMemory* fastMalloc(size_t size);
virtual void fastFree(VkBufferMemory* ptr);
virtual VkImageMemory* fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack);
virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
virtual void fastFree(VkImageMemory* ptr);

protected:
@@ -372,7 +370,7 @@ public:
public:
virtual VkBufferMemory* fastMalloc(size_t size);
virtual void fastFree(VkBufferMemory* ptr);
virtual VkImageMemory* fastMalloc(int /*dims*/, int /*w*/, int /*h*/, int /*c*/, size_t /*elemsize*/, int /*elempack*/)
virtual VkImageMemory* fastMalloc(int /*w*/, int /*h*/, int /*c*/, size_t /*elemsize*/, int /*elempack*/)
{
return 0;
}
@@ -392,7 +390,7 @@ public:
virtual ~VkAndroidHardwareBufferImageAllocator();

public:
virtual VkImageMemory* fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack);
virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack);
virtual void fastFree(VkImageMemory* ptr);
virtual VkBufferMemory* fastMalloc(size_t /*size*/)
{


+ 2
- 51
src/gpu.cpp View File

@@ -641,16 +641,9 @@ int create_gpu_instance()
// 650 = 0x5143 0x6050002

gpu_info.bug_storage_buffer_no_l1 = false;
gpu_info.bug_layout_binding_id_alias = false;
gpu_info.bug_corrupted_online_pipeline_cache = false;
gpu_info.bug_implicit_fp16_arithmetic = false;

if (physicalDeviceProperties.vendorID == 0x5143 && physicalDeviceProperties.apiVersion < VK_MAKE_VERSION(1, 0, 49))
{
// qcom adreno with old buggy driver cannot handle binding id alias
gpu_info.bug_layout_binding_id_alias = true;
}

if (physicalDeviceProperties.vendorID == 0x5143 && physicalDeviceProperties.apiVersion < VK_MAKE_VERSION(1, 0, 66))
{
// qcom adreno with old buggy driver cannot share created pipeline properly
@@ -668,22 +661,6 @@ int create_gpu_instance()
// gpu_info.bug_storage_buffer_no_l1 = true;
}

if (physicalDeviceProperties.vendorID == 0x13b5
&& (physicalDeviceProperties.deviceID == 0x7500001
|| physicalDeviceProperties.deviceID == 0x8602000
|| physicalDeviceProperties.deviceID == 0x8800020))
{
// these arm mali midgard era driver cannot handle binding id alias
gpu_info.bug_layout_binding_id_alias = true;
}

#if __APPLE__
{
// metal shader never accept binding id alias
gpu_info.bug_layout_binding_id_alias = true;
}
#endif

if (physicalDeviceProperties.vendorID == 0x13b5
&& (physicalDeviceProperties.deviceID == 0x7500001
|| physicalDeviceProperties.deviceID == 0x8602000
@@ -997,8 +974,8 @@ int create_gpu_instance()
gpu_info.graphics_queue_family_index, gpu_info.graphics_queue_count,
gpu_info.transfer_queue_family_index, gpu_info.transfer_queue_count);

NCNN_LOGE("[%u %s] bugsbn1=%d buglbia=%d bugcopc=%d bugihfa=%d", i, physicalDeviceProperties.deviceName,
gpu_info.bug_storage_buffer_no_l1, gpu_info.bug_layout_binding_id_alias, gpu_info.bug_corrupted_online_pipeline_cache, gpu_info.bug_implicit_fp16_arithmetic);
NCNN_LOGE("[%u %s] bugsbn1=%d bugcopc=%d bugihfa=%d", i, physicalDeviceProperties.deviceName,
gpu_info.bug_storage_buffer_no_l1, gpu_info.bug_corrupted_online_pipeline_cache, gpu_info.bug_implicit_fp16_arithmetic);

NCNN_LOGE("[%u %s] fp16p=%d fp16s=%d fp16a=%d int8s=%d int8a=%d", i, physicalDeviceProperties.deviceName,
gpu_info.support_fp16_packed, gpu_info.support_fp16_storage, gpu_info.support_fp16_arithmetic,
@@ -2031,12 +2008,6 @@ void VulkanDevice::convert_packing(const VkMat& src, VkMat& dst, int dst_elempac

void VulkanDevice::convert_packing(const VkImageMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const
{
if (info.bug_layout_binding_id_alias)
{
NCNN_LOGE("cannot convert_packing i2i");
return;
}

int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed ? 1 : 0;
int packing_type_to_index = dst_elempack == 1 ? 0 : dst_elempack == 4 ? 1 : 2;

@@ -2069,12 +2040,6 @@ void VulkanDevice::convert_packing(const VkImageMat& src, VkImageMat& dst, int d

void VulkanDevice::convert_packing(const VkMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const
{
if (info.bug_layout_binding_id_alias)
{
NCNN_LOGE("cannot convert_packing b2i");
return;
}

int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed ? 1 : 0;
int packing_type_to_index = dst_elempack == 1 ? 0 : dst_elempack == 4 ? 1 : 2;

@@ -2107,12 +2072,6 @@ void VulkanDevice::convert_packing(const VkMat& src, VkImageMat& dst, int dst_el

void VulkanDevice::convert_packing(const VkImageMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const
{
if (info.bug_layout_binding_id_alias)
{
NCNN_LOGE("cannot convert_packing i2b");
return;
}

int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed ? 1 : 0;
int packing_type_to_index = dst_elempack == 1 ? 0 : dst_elempack == 4 ? 1 : 2;

@@ -2359,12 +2318,6 @@ const ncnn::Packing_vulkan* VulkanDevice::get_utility_operator(int storage_type_
opt.use_fp16_packed = (cast_type_from_index == 1 || cast_type_to_index == 1);
opt.use_fp16_storage = (cast_type_from_index == 2 || cast_type_to_index == 2);

if (info.bug_layout_binding_id_alias && opt.use_image_storage)
{
NCNN_LOGE("cannot create uop with use_image_storage if bug_layout_binding_id_alias");
return 0;
}

if (!info.support_fp16_packed && opt.use_fp16_packed)
{
NCNN_LOGE("cannot create uop with use_fp16_packed if not support_fp16_packed");
@@ -2424,8 +2377,6 @@ void VulkanDevice::destroy_utility_operator()
for (int i1 = 0; i1 < 2; i1++)
{
opt.use_image_storage = (i0 == 1 || i1 == 1);
if (info.bug_layout_binding_id_alias && opt.use_image_storage)
continue;

// from fp32-b/i | fp16p-b/i | fp16s-b/i
// to fp32-b/i | fp16p-b/i | fp16s-b/i


+ 0
- 1
src/gpu.h View File

@@ -129,7 +129,6 @@ public:

// bug is not feature
bool bug_storage_buffer_no_l1;
bool bug_layout_binding_id_alias;
bool bug_corrupted_online_pipeline_cache;

// but sometimes bug is a feature


+ 2
- 29
src/layer/vulkan/shader/absval.comp View File

@@ -29,11 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };
@@ -58,19 +54,7 @@ void main()
return;

#if NCNN_image_shader
afp v;
if (psc(dims) == 1)
{
v = image1d_ld1(bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz));
}
afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -80,18 +64,7 @@ void main()
v = abs(v);

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st1(top_blob_1d, gx, v);
}
else if (psc(dims) == 2)
{
image2d_st1(top_blob_2d, ivec2(gx, gy), v);
}
else // if (psc(dims) == 3)
{
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v);
}
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v);
#else
buffer_st1(bottom_top_blob_data, gi, v);
#endif


+ 2
- 29
src/layer/vulkan/shader/absval_pack4.comp View File

@@ -29,11 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };
@@ -58,19 +54,7 @@ void main()
return;

#if NCNN_image_shader
afpvec4 v;
if (psc(dims) == 1)
{
v = image1d_ld4(bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz));
}
afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -80,18 +64,7 @@ void main()
v = abs(v);

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st4(top_blob_1d, gx, v);
}
else if (psc(dims) == 2)
{
image2d_st4(top_blob_2d, ivec2(gx, gy), v);
}
else // if (psc(dims) == 3)
{
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v);
}
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v);
#else
buffer_st4(bottom_top_blob_data, gi, v);
#endif


+ 2
- 29
src/layer/vulkan/shader/absval_pack8.comp View File

@@ -30,11 +30,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };
@@ -59,19 +55,7 @@ void main()
return;

#if NCNN_image_shader
afpvec8 v;
if (psc(dims) == 1)
{
v = image1d_ld8(bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz));
}
afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -82,18 +66,7 @@ void main()
v[1] = abs(v[1]);

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st8(top_blob_1d, gx, v);
}
else if (psc(dims) == 2)
{
image2d_st8(top_blob_2d, ivec2(gx, gy), v);
}
else // if (psc(dims) == 3)
{
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v);
}
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v);
#else
buffer_st8(bottom_top_blob_data, gi, v);
#endif


+ 6
- 33
src/layer/vulkan/shader/batchnorm.comp View File

@@ -29,14 +29,10 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
layout (binding = 2) uniform unfp sampler1D a;
layout (binding = 3) uniform unfp sampler1D b;
layout (binding = 2) uniform unfp sampler3D a;
layout (binding = 3) uniform unfp sampler3D b;
#else
layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };
layout (binding = 1) readonly buffer a { sfp a_data[]; };
@@ -62,19 +58,7 @@ void main()
return;

#if NCNN_image_shader
afp v;
if (psc(dims) == 1)
{
v = image1d_ld1(bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz));
}
afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -85,8 +69,8 @@ void main()
const int abi = gxyz[psc(dims) - 1];

#if NCNN_image_shader
afp b = image1d_ld1(b, abi);
afp a = image1d_ld1(a, abi);
afp b = image3d_ld1(b, ivec3(abi, 0, 0));
afp a = image3d_ld1(a, ivec3(abi, 0, 0));
#else
afp b = buffer_ld1(b_data, abi);
afp a = buffer_ld1(a_data, abi);
@@ -95,18 +79,7 @@ void main()
v = b * v + a;

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st1(top_blob_1d, gx, v);
}
else if (psc(dims) == 2)
{
image2d_st1(top_blob_2d, ivec2(gx, gy), v);
}
else // if (psc(dims) == 3)
{
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v);
}
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v);
#else
buffer_st1(bottom_top_blob_data, gi, v);
#endif


+ 6
- 33
src/layer/vulkan/shader/batchnorm_pack4.comp View File

@@ -29,14 +29,10 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
layout (binding = 2) uniform unfp sampler1D a;
layout (binding = 3) uniform unfp sampler1D b;
layout (binding = 2) uniform unfp sampler3D a;
layout (binding = 3) uniform unfp sampler3D b;
#else
layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };
layout (binding = 1) readonly buffer a { sfpvec4 a_data[]; };
@@ -62,19 +58,7 @@ void main()
return;

#if NCNN_image_shader
afpvec4 v;
if (psc(dims) == 1)
{
v = image1d_ld4(bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz));
}
afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -85,8 +69,8 @@ void main()
const int abi = gxyz[psc(dims) - 1];

#if NCNN_image_shader
afpvec4 b = image1d_ld4(b, abi);
afpvec4 a = image1d_ld4(a, abi);
afpvec4 b = image3d_ld4(b, ivec3(abi, 0, 0));
afpvec4 a = image3d_ld4(a, ivec3(abi, 0, 0));
#else
afpvec4 b = buffer_ld4(b_data, abi);
afpvec4 a = buffer_ld4(a_data, abi);
@@ -95,18 +79,7 @@ void main()
v = b * v + a;

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st4(top_blob_1d, gx, v);
}
else if (psc(dims) == 2)
{
image2d_st4(top_blob_2d, ivec2(gx, gy), v);
}
else // if (psc(dims) == 3)
{
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v);
}
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v);
#else
buffer_st4(bottom_top_blob_data, gi, v);
#endif


+ 6
- 33
src/layer/vulkan/shader/batchnorm_pack8.comp View File

@@ -30,14 +30,10 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
layout (binding = 2) uniform unfp sampler1D a;
layout (binding = 3) uniform unfp sampler1D b;
layout (binding = 2) uniform unfp sampler3D a;
layout (binding = 3) uniform unfp sampler3D b;
#else
layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };
layout (binding = 1) readonly buffer a { sfpvec8 a_data[]; };
@@ -63,19 +59,7 @@ void main()
return;

#if NCNN_image_shader
afpvec8 v;
if (psc(dims) == 1)
{
v = image1d_ld8(bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz));
}
afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -86,8 +70,8 @@ void main()
const int abi = gxyz[psc(dims) - 1];

#if NCNN_image_shader
afpvec8 b = image1d_ld8(b, abi);
afpvec8 a = image1d_ld8(a, abi);
afpvec8 b = image3d_ld8(b, ivec3(abi, 0, 0));
afpvec8 a = image3d_ld8(a, ivec3(abi, 0, 0));
#else
afpvec8 b = buffer_ld8(b_data, abi);
afpvec8 a = buffer_ld8(a_data, abi);
@@ -97,18 +81,7 @@ void main()
v[1] = b[1] * v[1] + a[1];

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st8(top_blob_1d, gx, v);
}
else if (psc(dims) == 2)
{
image2d_st8(top_blob_2d, ivec2(gx, gy), v);
}
else // if (psc(dims) == 3)
{
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v);
}
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v);
#else
buffer_st8(bottom_top_blob_data, gi, v);
#endif


+ 4
- 58
src/layer/vulkan/shader/binaryop.comp View File

@@ -45,14 +45,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D a_blob_1d;
layout (binding = 0) uniform unfp sampler2D a_blob_2d;
layout (binding = 0) uniform unfp sampler3D a_blob_3d;
layout (binding = 1) uniform unfp sampler1D b_blob_1d;
layout (binding = 1) uniform unfp sampler2D b_blob_2d;
layout (binding = 1) uniform unfp sampler3D b_blob_3d;
layout (binding = 2, imfmtc1) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 2, imfmtc1) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 2, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) buffer a_blob { sfp a_blob_data[]; };
@@ -91,20 +85,7 @@ void main()
return;

#if NCNN_image_shader
afp v1;

if (psc(outdims) == 1)
{
v1 = image1d_ld1(a_blob_1d, gx);
}
else if (psc(outdims) == 2)
{
v1 = image2d_ld1(a_blob_2d, ivec2(gx, gy));
}
else // if (psc(outdims) == 3)
{
v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz));
}
afp v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;

@@ -129,18 +110,7 @@ void main()
if (op_type == 8) res = b / v1;

#if NCNN_image_shader
if (psc(outdims) == 1)
{
image1d_st1(top_blob_1d, gx, res);
}
else if (psc(outdims) == 2)
{
image2d_st1(top_blob_2d, ivec2(gx, gy), res);
}
else // if (psc(outdims) == 3)
{
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res);
}
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res);
#else
buffer_st1(a_blob_data, gi, res);
#endif
@@ -149,20 +119,7 @@ void main()
{
// type 7 13 19
#if NCNN_image_shader
afp v2;

if (psc(outdims) == 1)
{
v2 = image1d_ld1(b_blob_1d, gx);
}
else if (psc(outdims) == 2)
{
v2 = image2d_ld1(b_blob_2d, ivec2(gx, gy));
}
else // if (psc(outdims) == 3)
{
v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz));
}
afp v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz));
#else
afp v2 = buffer_ld1(b_blob_data, gi);
#endif
@@ -178,18 +135,7 @@ void main()
if (op_type == 8) res = v2 / v1;

#if NCNN_image_shader
if (psc(outdims) == 1)
{
image1d_st1(top_blob_1d, gx, res);
}
else if (psc(outdims) == 2)
{
image2d_st1(top_blob_2d, ivec2(gx, gy), res);
}
else // if (psc(outdims) == 3)
{
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res);
}
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res);
#else
buffer_st1(top_blob_data, gi, res);
#endif


+ 58
- 75
src/layer/vulkan/shader/binaryop_broadcast.comp View File

@@ -43,14 +43,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D a_blob_1d;
layout (binding = 0) uniform unfp sampler2D a_blob_2d;
layout (binding = 0) uniform unfp sampler3D a_blob_3d;
layout (binding = 1) uniform unfp sampler1D b_blob_1d;
layout (binding = 1) uniform unfp sampler2D b_blob_2d;
layout (binding = 1) uniform unfp sampler3D b_blob_3d;
layout (binding = 2, imfmtc1) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 2, imfmtc1) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 2, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer a_blob { sfp a_blob_data[]; };
@@ -89,8 +83,12 @@ void main()
return;

#if NCNN_image_shader
afp v1;
afp v2;
int ax = gx;
int ay = gy;
int az = gz;
int bx = gx;
int by = gy;
int bz = gz;

if (psc(adims) == 3)
{
@@ -99,37 +97,36 @@ void main()
if (psc(bw) == 1 && psc(bh) == 1)
{
// special type 1
v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz));
v2 = image3d_ld1(b_blob_3d, ivec3(0, 0, gz));
bx = 0;
by = 0;
}

if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(bc) == 1)
{
// special type 2
v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz));
v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, 0));
bz = 0;
}

if (psc(aw) == 1 && psc(ah) == 1)
{
// special type 3
v1 = image3d_ld1(a_blob_3d, ivec3(0, 0, gz));
v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz));
ax = 0;
ay = 0;
}

if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1)
{
// special type 4
v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, 0));
v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz));
az = 0;
}
}

if (psc(bdims) == 2)
{
// type 18
v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz));
v2 = image2d_ld1(b_blob_2d, ivec2(gy, gz));
bx = gy;
by = gz;
bz = 0;
}

if (psc(bdims) == 1)
@@ -137,14 +134,16 @@ void main()
if (psc(bw) == 1)
{
// type 16
v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz));
v2 = image1d_ld1(b_blob_1d, 0);
bx = 0;
by = 0;
bz = 0;
}
else
{
// type 17
v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz));
v2 = image1d_ld1(b_blob_1d, gz);
bx = gz;
by = 0;
bz = 0;
}
}
}
@@ -153,8 +152,9 @@ void main()
if (psc(bdims) == 3)
{
// type 14
v1 = image2d_ld1(a_blob_2d, ivec2(gy, gz));
v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz));
ax = gy;
ay = gz;
az = 0;
}

if (psc(bdims) == 1)
@@ -162,67 +162,61 @@ void main()
if (psc(bw) == 1)
{
// type 11
v1 = image2d_ld1(a_blob_2d, ivec2(gx, gy));
v2 = image1d_ld1(b_blob_1d, 0);
bx = 0;
by = 0;
bz = 0;
}
else
{
// type 12
v1 = image2d_ld1(a_blob_2d, ivec2(gx, gy));
v2 = image1d_ld1(b_blob_1d, gy);
bx = gy;
by = 0;
bz = 0;
}
}
}
else if (psc(adims) == 1)
{
if (psc(bdims) == 3)
if (psc(aw) == 1)
{
if (psc(aw) == 1)
{
// type 4
v1 = image1d_ld1(a_blob_1d, 0);
v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz));
}
else
{
// type 9
v1 = image1d_ld1(a_blob_1d, gz);
v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz));
}
// type 2 3 4
ax = 0;
ay = 0;
az = 0;
}

if (psc(bdims) == 2)
else
{
if (psc(aw) == 1)
if (psc(bdims) == 3)
{
// type 3
v1 = image1d_ld1(a_blob_1d, 0);
v2 = image2d_ld1(b_blob_2d, ivec2(gx, gy));
// type 9
ax = gz;
ay = 0;
az = 0;
}
else

if (psc(bdims) == 2)
{
// type 8
v1 = image1d_ld1(a_blob_1d, gy);
v2 = image2d_ld1(b_blob_2d, ivec2(gx, gy));
ax = gy;
ay = 0;
az = 0;
}
}

if (psc(bdims) == 1)
{
if (psc(aw) == 1)
{
// type 2
v1 = image1d_ld1(a_blob_1d, 0);
v2 = image1d_ld1(b_blob_1d, gx);
}
else // if (psc(bw) == 1)
if (psc(bdims) == 1)
{
// type 6
v1 = image1d_ld1(a_blob_1d, gx);
v2 = image1d_ld1(b_blob_1d, 0);
if (psc(bw) == 1)
{
// type 6
bx = 0;
by = 0;
bz = 0;
}
}
}
}

afp v1 = image3d_ld1(a_blob_3d, ivec3(ax, ay, az));
afp v2 = image3d_ld1(b_blob_3d, ivec3(bx, by, bz));
#else
const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;

@@ -363,18 +357,7 @@ void main()
if (op_type == 8) res = v2 / v1;

#if NCNN_image_shader
if (psc(outdims) == 1)
{
image1d_st1(top_blob_1d, gx, res);
}
else if (psc(outdims) == 2)
{
image2d_st1(top_blob_2d, ivec2(gx, gy), res);
}
else // if (psc(outdims) == 3)
{
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res);
}
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res);
#else
buffer_st1(top_blob_data, gi, res);
#endif


+ 3
- 31
src/layer/vulkan/shader/binaryop_broadcast_a1_pack4.comp View File

@@ -43,13 +43,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D a_blob_1d;
layout (binding = 0) uniform unfp sampler3D a_blob_3d;
layout (binding = 1) uniform unfp sampler1D b_blob_1d;
layout (binding = 1) uniform unfp sampler2D b_blob_2d;
layout (binding = 1) uniform unfp sampler3D b_blob_3d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer a_blob { sfp a_blob_data[]; };
@@ -89,7 +84,6 @@ void main()

#if NCNN_image_shader
afpvec4 v1;
afpvec4 v2;

if (psc(adims) == 3 && psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1)
{
@@ -98,21 +92,10 @@ void main()
}
else
{
v1 = afpvec4(image1d_ld1(a_blob_1d, 0));
v1 = afpvec4(image3d_ld1(a_blob_3d, ivec3(0, 0, 0)));
}

if (psc(bdims) == 1)
{
v2 = image1d_ld4(b_blob_1d, gx);
}
else if (psc(bdims) == 2)
{
v2 = image2d_ld4(b_blob_2d, ivec2(gx, gy));
}
else // if (psc(bdims) == 3)
{
v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz));
}
afpvec4 v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;

@@ -142,18 +125,7 @@ void main()
if (op_type == 8) res = v2 / v1;

#if NCNN_image_shader
if (psc(outdims) == 1)
{
image1d_st4(top_blob_1d, gx, res);
}
else if (psc(outdims) == 2)
{
image2d_st4(top_blob_2d, ivec2(gx, gy), res);
}
else // if (psc(outdims) == 3)
{
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res);
}
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res);
#else
buffer_st4(top_blob_data, gi, res);
#endif


+ 3
- 31
src/layer/vulkan/shader/binaryop_broadcast_a1_pack8.comp View File

@@ -44,13 +44,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D a_blob_1d;
layout (binding = 0) uniform unfp sampler3D a_blob_3d;
layout (binding = 1) uniform unfp sampler1D b_blob_1d;
layout (binding = 1) uniform unfp sampler2D b_blob_2d;
layout (binding = 1) uniform unfp sampler3D b_blob_3d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer a_blob { sfp a_blob_data[]; };
@@ -90,7 +85,6 @@ void main()

#if NCNN_image_shader
afpvec4 v1;
afpvec8 v2;

if (psc(adims) == 3 && psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1)
{
@@ -99,21 +93,10 @@ void main()
}
else
{
v1 = afpvec4(image1d_ld1(a_blob_1d, 0));
v1 = afpvec4(image3d_ld1(a_blob_3d, ivec3(0, 0, 0)));
}

if (psc(bdims) == 1)
{
v2 = image1d_ld8(b_blob_1d, gx);
}
else if (psc(bdims) == 2)
{
v2 = image2d_ld8(b_blob_2d, ivec2(gx, gy));
}
else // if (psc(bdims) == 3)
{
v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz));
}
afpvec8 v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;

@@ -179,18 +162,7 @@ void main()
}

#if NCNN_image_shader
if (psc(outdims) == 1)
{
image1d_st8(top_blob_1d, gx, res);
}
else if (psc(outdims) == 2)
{
image2d_st8(top_blob_2d, ivec2(gx, gy), res);
}
else // if (psc(outdims) == 3)
{
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res);
}
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res);
#else
buffer_st8(top_blob_data, gi, res);
#endif


+ 3
- 31
src/layer/vulkan/shader/binaryop_broadcast_b1_pack4.comp View File

@@ -43,13 +43,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D a_blob_1d;
layout (binding = 0) uniform unfp sampler2D a_blob_2d;
layout (binding = 0) uniform unfp sampler3D a_blob_3d;
layout (binding = 1) uniform unfp sampler1D b_blob_1d;
layout (binding = 1) uniform unfp sampler3D b_blob_3d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer a_blob { sfpvec4 a_blob_data[]; };
@@ -88,7 +83,6 @@ void main()
return;

#if NCNN_image_shader
afpvec4 v1;
afpvec4 v2;

if (psc(bdims) == 3 && psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(bc) == 1)
@@ -98,21 +92,10 @@ void main()
}
else
{
v2 = afpvec4(image1d_ld1(b_blob_1d, 0));
v2 = afpvec4(image3d_ld1(b_blob_3d, ivec3(0, 0, 0)));
}

if (psc(adims) == 1)
{
v1 = image1d_ld4(a_blob_1d, gx);
}
else if (psc(adims) == 2)
{
v1 = image2d_ld4(a_blob_2d, ivec2(gx, gy));
}
else // if (psc(adims) == 3)
{
v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz));
}
afpvec4 v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;

@@ -142,18 +125,7 @@ void main()
if (op_type == 8) res = v2 / v1;

#if NCNN_image_shader
if (psc(outdims) == 1)
{
image1d_st4(top_blob_1d, gx, res);
}
else if (psc(outdims) == 2)
{
image2d_st4(top_blob_2d, ivec2(gx, gy), res);
}
else // if (psc(outdims) == 3)
{
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res);
}
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res);
#else
buffer_st4(top_blob_data, gi, res);
#endif


+ 3
- 31
src/layer/vulkan/shader/binaryop_broadcast_b1_pack8.comp View File

@@ -44,13 +44,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D a_blob_1d;
layout (binding = 0) uniform unfp sampler2D a_blob_2d;
layout (binding = 0) uniform unfp sampler3D a_blob_3d;
layout (binding = 1) uniform unfp sampler1D b_blob_1d;
layout (binding = 1) uniform unfp sampler3D b_blob_3d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer a_blob { sfpvec8 a_blob_data[]; };
@@ -89,7 +84,6 @@ void main()
return;

#if NCNN_image_shader
afpvec8 v1;
afpvec4 v2;

if (psc(bdims) == 3 && psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(bc) == 1)
@@ -99,21 +93,10 @@ void main()
}
else
{
v2 = afpvec4(image1d_ld1(b_blob_1d, 0));
v2 = afpvec4(image3d_ld1(b_blob_3d, ivec3(0, 0, 0)));
}

if (psc(adims) == 1)
{
v1 = image1d_ld8(a_blob_1d, gx);
}
else if (psc(adims) == 2)
{
v1 = image2d_ld8(a_blob_2d, ivec2(gx, gy));
}
else // if (psc(adims) == 3)
{
v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz));
}
afpvec8 v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;

@@ -179,18 +162,7 @@ void main()
}

#if NCNN_image_shader
if (psc(outdims) == 1)
{
image1d_st8(top_blob_1d, gx, res);
}
else if (psc(outdims) == 2)
{
image2d_st8(top_blob_2d, ivec2(gx, gy), res);
}
else // if (psc(outdims) == 3)
{
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res);
}
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res);
#else
buffer_st8(top_blob_data, gi, res);
#endif


+ 58
- 75
src/layer/vulkan/shader/binaryop_broadcast_pack4.comp View File

@@ -43,14 +43,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D a_blob_1d;
layout (binding = 0) uniform unfp sampler2D a_blob_2d;
layout (binding = 0) uniform unfp sampler3D a_blob_3d;
layout (binding = 1) uniform unfp sampler1D b_blob_1d;
layout (binding = 1) uniform unfp sampler2D b_blob_2d;
layout (binding = 1) uniform unfp sampler3D b_blob_3d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer a_blob { sfpvec4 a_blob_data[]; };
@@ -89,8 +83,12 @@ void main()
return;

#if NCNN_image_shader
afpvec4 v1;
afpvec4 v2;
int ax = gx;
int ay = gy;
int az = gz;
int bx = gx;
int by = gy;
int bz = gz;

if (psc(adims) == 3)
{
@@ -99,37 +97,36 @@ void main()
if (psc(bw) == 1 && psc(bh) == 1)
{
// special type 1
v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz));
v2 = image3d_ld4(b_blob_3d, ivec3(0, 0, gz));
bx = 0;
by = 0;
}

if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(bc) == 1)
{
// special type 2
v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz));
v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, 0));
bz = 0;
}

if (psc(aw) == 1 && psc(ah) == 1)
{
// special type 3
v1 = image3d_ld4(a_blob_3d, ivec3(0, 0, gz));
v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz));
ax = 0;
ay = 0;
}

if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1)
{
// special type 4
v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, 0));
v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz));
az = 0;
}
}

if (psc(bdims) == 2)
{
// type 18
v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz));
v2 = image2d_ld4(b_blob_2d, ivec2(gy, gz));
bx = gy;
by = gz;
bz = 0;
}

if (psc(bdims) == 1)
@@ -137,14 +134,16 @@ void main()
if (psc(bw) == 1)
{
// type 16
v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz));
v2 = image1d_ld4(b_blob_1d, 0);
bx = 0;
by = 0;
bz = 0;
}
else
{
// type 17
v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz));
v2 = image1d_ld4(b_blob_1d, gz);
bx = gz;
by = 0;
bz = 0;
}
}
}
@@ -153,8 +152,9 @@ void main()
if (psc(bdims) == 3)
{
// type 14
v1 = image2d_ld4(a_blob_2d, ivec2(gy, gz));
v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz));
ax = gy;
ay = gz;
az = 0;
}

if (psc(bdims) == 1)
@@ -162,67 +162,61 @@ void main()
if (psc(bw) == 1)
{
// type 11
v1 = image2d_ld4(a_blob_2d, ivec2(gx, gy));
v2 = image1d_ld4(b_blob_1d, 0);
bx = 0;
by = 0;
bz = 0;
}
else
{
// type 12
v1 = image2d_ld4(a_blob_2d, ivec2(gx, gy));
v2 = image1d_ld4(b_blob_1d, gy);
bx = gy;
by = 0;
bz = 0;
}
}
}
else if (psc(adims) == 1)
{
if (psc(bdims) == 3)
if (psc(aw) == 1)
{
if (psc(aw) == 1)
{
// type 4
v1 = image1d_ld4(a_blob_1d, 0);
v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz));
}
else
{
// type 9
v1 = image1d_ld4(a_blob_1d, gz);
v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz));
}
// type 2 3 4
ax = 0;
ay = 0;
az = 0;
}

if (psc(bdims) == 2)
else
{
if (psc(aw) == 1)
if (psc(bdims) == 3)
{
// type 3
v1 = image1d_ld4(a_blob_1d, 0);
v2 = image2d_ld4(b_blob_2d, ivec2(gx, gy));
// type 9
ax = gz;
ay = 0;
az = 0;
}
else

if (psc(bdims) == 2)
{
// type 8
v1 = image1d_ld4(a_blob_1d, gy);
v2 = image2d_ld4(b_blob_2d, ivec2(gx, gy));
ax = gy;
ay = 0;
az = 0;
}
}

if (psc(bdims) == 1)
{
if (psc(aw) == 1)
if (psc(bdims) == 1)
{
// type 2
v1 = image1d_ld4(a_blob_1d, 0);
v2 = image1d_ld4(b_blob_1d, gx);
}
else // if (psc(bw) == 1)
{
// type 6
v1 = image1d_ld4(a_blob_1d, gx);
v2 = image1d_ld4(b_blob_1d, 0);
if (psc(bw) == 1)
{
// type 6
bx = 0;
by = 0;
bz = 0;
}
}
}
}

afpvec4 v1 = image3d_ld4(a_blob_3d, ivec3(ax, ay, az));
afpvec4 v2 = image3d_ld4(b_blob_3d, ivec3(bx, by, bz));
#else
const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;

@@ -312,18 +306,7 @@ void main()
if (op_type == 8) res = v2 / v1;

#if NCNN_image_shader
if (psc(outdims) == 1)
{
image1d_st4(top_blob_1d, gx, res);
}
else if (psc(outdims) == 2)
{
image2d_st4(top_blob_2d, ivec2(gx, gy), res);
}
else // if (psc(outdims) == 3)
{
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res);
}
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res);
#else
buffer_st4(top_blob_data, gi, res);
#endif


+ 58
- 75
src/layer/vulkan/shader/binaryop_broadcast_pack8.comp View File

@@ -44,14 +44,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D a_blob_1d;
layout (binding = 0) uniform unfp sampler2D a_blob_2d;
layout (binding = 0) uniform unfp sampler3D a_blob_3d;
layout (binding = 1) uniform unfp sampler1D b_blob_1d;
layout (binding = 1) uniform unfp sampler2D b_blob_2d;
layout (binding = 1) uniform unfp sampler3D b_blob_3d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer a_blob { sfpvec8 a_blob_data[]; };
@@ -90,8 +84,12 @@ void main()
return;

#if NCNN_image_shader
afpvec8 v1;
afpvec8 v2;
int ax = gx;
int ay = gy;
int az = gz;
int bx = gx;
int by = gy;
int bz = gz;

if (psc(adims) == 3)
{
@@ -100,37 +98,36 @@ void main()
if (psc(bw) == 1 && psc(bh) == 1)
{
// special type 1
v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz));
v2 = image3d_ld8(b_blob_3d, ivec3(0, 0, gz));
bx = 0;
by = 0;
}

if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(bc) == 1)
{
// special type 2
v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz));
v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, 0));
bz = 0;
}

if (psc(aw) == 1 && psc(ah) == 1)
{
// special type 3
v1 = image3d_ld8(a_blob_3d, ivec3(0, 0, gz));
v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz));
ax = 0;
ay = 0;
}

if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1)
{
// special type 4
v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, 0));
v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz));
az = 0;
}
}

if (psc(bdims) == 2)
{
// type 18
v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz));
v2 = image2d_ld8(b_blob_2d, ivec2(gy, gz));
bx = gy;
by = gz;
bz = 0;
}

if (psc(bdims) == 1)
@@ -138,14 +135,16 @@ void main()
if (psc(bw) == 1)
{
// type 16
v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz));
v2 = image1d_ld8(b_blob_1d, 0);
bx = 0;
by = 0;
bz = 0;
}
else
{
// type 17
v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz));
v2 = image1d_ld8(b_blob_1d, gz);
bx = gz;
by = 0;
bz = 0;
}
}
}
@@ -154,8 +153,9 @@ void main()
if (psc(bdims) == 3)
{
// type 14
v1 = image2d_ld8(a_blob_2d, ivec2(gy, gz));
v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz));
ax = gy;
ay = gz;
az = 0;
}

if (psc(bdims) == 1)
@@ -163,67 +163,61 @@ void main()
if (psc(bw) == 1)
{
// type 11
v1 = image2d_ld8(a_blob_2d, ivec2(gx, gy));
v2 = image1d_ld8(b_blob_1d, 0);
bx = 0;
by = 0;
bz = 0;
}
else
{
// type 12
v1 = image2d_ld8(a_blob_2d, ivec2(gx, gy));
v2 = image1d_ld8(b_blob_1d, gy);
bx = gy;
by = 0;
bz = 0;
}
}
}
else if (psc(adims) == 1)
{
if (psc(bdims) == 3)
if (psc(aw) == 1)
{
if (psc(aw) == 1)
{
// type 4
v1 = image1d_ld8(a_blob_1d, 0);
v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz));
}
else
{
// type 9
v1 = image1d_ld8(a_blob_1d, gz);
v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz));
}
// type 2 3 4
ax = 0;
ay = 0;
az = 0;
}

if (psc(bdims) == 2)
else
{
if (psc(aw) == 1)
if (psc(bdims) == 3)
{
// type 3
v1 = image1d_ld8(a_blob_1d, 0);
v2 = image2d_ld8(b_blob_2d, ivec2(gx, gy));
// type 9
ax = gz;
ay = 0;
az = 0;
}
else

if (psc(bdims) == 2)
{
// type 8
v1 = image1d_ld8(a_blob_1d, gy);
v2 = image2d_ld8(b_blob_2d, ivec2(gx, gy));
ax = gy;
ay = 0;
az = 0;
}
}

if (psc(bdims) == 1)
{
if (psc(aw) == 1)
if (psc(bdims) == 1)
{
// type 2
v1 = image1d_ld8(a_blob_1d, 0);
v2 = image1d_ld8(b_blob_1d, gx);
}
else // if (psc(bw) == 1)
{
// type 6
v1 = image1d_ld8(a_blob_1d, gx);
v2 = image1d_ld8(b_blob_1d, 0);
if (psc(bw) == 1)
{
// type 6
bx = 0;
by = 0;
bz = 0;
}
}
}
}

afpvec8 v1 = image3d_ld8(a_blob_3d, ivec3(ax, ay, az));
afpvec8 v2 = image3d_ld8(b_blob_3d, ivec3(bx, by, bz));
#else
const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;

@@ -349,18 +343,7 @@ void main()
}

#if NCNN_image_shader
if (psc(outdims) == 1)
{
image1d_st8(top_blob_1d, gx, res);
}
else if (psc(outdims) == 2)
{
image2d_st8(top_blob_2d, ivec2(gx, gy), res);
}
else // if (psc(outdims) == 3)
{
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res);
}
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res);
#else
buffer_st8(top_blob_data, gi, res);
#endif


+ 4
- 58
src/layer/vulkan/shader/binaryop_pack4.comp View File

@@ -45,14 +45,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D a_blob_1d;
layout (binding = 0) uniform unfp sampler2D a_blob_2d;
layout (binding = 0) uniform unfp sampler3D a_blob_3d;
layout (binding = 1) uniform unfp sampler1D b_blob_1d;
layout (binding = 1) uniform unfp sampler2D b_blob_2d;
layout (binding = 1) uniform unfp sampler3D b_blob_3d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) buffer a_blob { sfpvec4 a_blob_data[]; };
@@ -91,20 +85,7 @@ void main()
return;

#if NCNN_image_shader
afpvec4 v1;

if (psc(outdims) == 1)
{
v1 = image1d_ld4(a_blob_1d, gx);
}
else if (psc(outdims) == 2)
{
v1 = image2d_ld4(a_blob_2d, ivec2(gx, gy));
}
else // if (psc(outdims) == 3)
{
v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz));
}
afpvec4 v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;

@@ -129,18 +110,7 @@ void main()
if (op_type == 8) res = b / v1;

#if NCNN_image_shader
if (psc(outdims) == 1)
{
image1d_st4(top_blob_1d, gx, res);
}
else if (psc(outdims) == 2)
{
image2d_st4(top_blob_2d, ivec2(gx, gy), res);
}
else // if (psc(outdims) == 3)
{
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res);
}
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res);
#else
buffer_st4(a_blob_data, gi, res);
#endif
@@ -149,20 +119,7 @@ void main()
{
// type 7 13 19
#if NCNN_image_shader
afpvec4 v2;

if (psc(outdims) == 1)
{
v2 = image1d_ld4(b_blob_1d, gx);
}
else if (psc(outdims) == 2)
{
v2 = image2d_ld4(b_blob_2d, ivec2(gx, gy));
}
else // if (psc(outdims) == 3)
{
v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz));
}
afpvec4 v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz));
#else
afpvec4 v2 = buffer_ld4(b_blob_data, gi);
#endif
@@ -178,18 +135,7 @@ void main()
if (op_type == 8) res = v2 / v1;

#if NCNN_image_shader
if (psc(outdims) == 1)
{
image1d_st4(top_blob_1d, gx, res);
}
else if (psc(outdims) == 2)
{
image2d_st4(top_blob_2d, ivec2(gx, gy), res);
}
else // if (psc(outdims) == 3)
{
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res);
}
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res);
#else
buffer_st4(top_blob_data, gi, res);
#endif


+ 4
- 58
src/layer/vulkan/shader/binaryop_pack8.comp View File

@@ -46,14 +46,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D a_blob_1d;
layout (binding = 0) uniform unfp sampler2D a_blob_2d;
layout (binding = 0) uniform unfp sampler3D a_blob_3d;
layout (binding = 1) uniform unfp sampler1D b_blob_1d;
layout (binding = 1) uniform unfp sampler2D b_blob_2d;
layout (binding = 1) uniform unfp sampler3D b_blob_3d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) buffer a_blob { sfpvec8 a_blob_data[]; };
@@ -92,20 +86,7 @@ void main()
return;

#if NCNN_image_shader
afpvec8 v1;

if (psc(outdims) == 1)
{
v1 = image1d_ld8(a_blob_1d, gx);
}
else if (psc(outdims) == 2)
{
v1 = image2d_ld8(a_blob_2d, ivec2(gx, gy));
}
else // if (psc(outdims) == 3)
{
v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz));
}
afpvec8 v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;

@@ -166,18 +147,7 @@ void main()
}

#if NCNN_image_shader
if (psc(outdims) == 1)
{
image1d_st8(top_blob_1d, gx, res);
}
else if (psc(outdims) == 2)
{
image2d_st8(top_blob_2d, ivec2(gx, gy), res);
}
else // if (psc(outdims) == 3)
{
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res);
}
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res);
#else
buffer_st8(a_blob_data, gi, res);
#endif
@@ -186,20 +156,7 @@ void main()
{
// type 7 13 19
#if NCNN_image_shader
afpvec8 v2;

if (psc(outdims) == 1)
{
v2 = image1d_ld8(b_blob_1d, gx);
}
else if (psc(outdims) == 2)
{
v2 = image2d_ld8(b_blob_2d, ivec2(gx, gy));
}
else // if (psc(outdims) == 3)
{
v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz));
}
afpvec8 v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz));
#else
afpvec8 v2 = buffer_ld8(b_blob_data, gi);
#endif
@@ -251,18 +208,7 @@ void main()
}

#if NCNN_image_shader
if (psc(outdims) == 1)
{
image1d_st8(top_blob_1d, gx, res);
}
else if (psc(outdims) == 2)
{
image2d_st8(top_blob_2d, ivec2(gx, gy), res);
}
else // if (psc(outdims) == 3)
{
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res);
}
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res);
#else
buffer_st8(top_blob_data, gi, res);
#endif


+ 1
- 16
src/layer/vulkan/shader/cast_fp16_to_fp32.comp View File

@@ -35,11 +35,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, r32f) writeonly uniform highp image1D top_blob_1d;
layout (binding = 1, r32f) writeonly uniform highp image2D top_blob_2d;
layout (binding = 1, r32f) writeonly uniform highp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
@@ -71,18 +67,7 @@ void main()
return;

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_cp1(top_blob_1d, gx, bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
image2d_cp1(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
}
image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;



+ 1
- 16
src/layer/vulkan/shader/cast_fp16_to_fp32_pack4.comp View File

@@ -35,11 +35,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, rgba32f) writeonly uniform highp image1D top_blob_1d;
layout (binding = 1, rgba32f) writeonly uniform highp image2D top_blob_2d;
layout (binding = 1, rgba32f) writeonly uniform highp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
@@ -71,18 +67,7 @@ void main()
return;

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_cp4(top_blob_1d, gx, bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
image2d_cp4(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
}
image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;



+ 1
- 16
src/layer/vulkan/shader/cast_fp16_to_fp32_pack8.comp View File

@@ -36,11 +36,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, rgba32f) writeonly uniform highp image1D top_blob_1d;
layout (binding = 1, rgba32f) writeonly uniform highp image2D top_blob_2d;
layout (binding = 1, rgba32f) writeonly uniform highp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
@@ -72,18 +68,7 @@ void main()
return;

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_cp8(top_blob_1d, gx, bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
image2d_cp8(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
}
image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;



+ 1
- 16
src/layer/vulkan/shader/cast_fp32_to_fp16.comp View File

@@ -35,11 +35,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform highp sampler1D bottom_blob_1d;
layout (binding = 0) uniform highp sampler2D bottom_blob_2d;
layout (binding = 0) uniform highp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob { float bottom_blob_data[]; };
@@ -71,18 +67,7 @@ void main()
return;

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_cp1(top_blob_1d, gx, bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
image2d_cp1(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
}
image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;



+ 1
- 16
src/layer/vulkan/shader/cast_fp32_to_fp16_pack4.comp View File

@@ -35,11 +35,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform highp sampler1D bottom_blob_1d;
layout (binding = 0) uniform highp sampler2D bottom_blob_2d;
layout (binding = 0) uniform highp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob { vec4 bottom_blob_data[]; };
@@ -71,18 +67,7 @@ void main()
return;

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_cp4(top_blob_1d, gx, bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
image2d_cp4(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
}
image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;



+ 1
- 16
src/layer/vulkan/shader/cast_fp32_to_fp16_pack8.comp View File

@@ -36,11 +36,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform highp sampler1D bottom_blob_1d;
layout (binding = 0) uniform highp sampler2D bottom_blob_2d;
layout (binding = 0) uniform highp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob { mat2x4 bottom_blob_data[]; };
@@ -72,18 +68,7 @@ void main()
return;

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_cp8(top_blob_1d, gx, bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
image2d_cp8(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
}
image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;



+ 2
- 29
src/layer/vulkan/shader/clip.comp View File

@@ -32,11 +32,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };
@@ -61,19 +57,7 @@ void main()
return;

#if NCNN_image_shader
afp v;
if (psc(dims) == 1)
{
v = image1d_ld1(bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz));
}
afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -83,18 +67,7 @@ void main()
v = clamp(v, afp(const_min), afp(const_max));

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st1(top_blob_1d, gx, v);
}
else if (psc(dims) == 2)
{
image2d_st1(top_blob_2d, ivec2(gx, gy), v);
}
else // if (psc(dims) == 3)
{
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v);
}
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v);
#else
buffer_st1(bottom_top_blob_data, gi, v);
#endif


+ 2
- 29
src/layer/vulkan/shader/clip_pack4.comp View File

@@ -32,11 +32,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };
@@ -61,19 +57,7 @@ void main()
return;

#if NCNN_image_shader
afpvec4 v;
if (psc(dims) == 1)
{
v = image1d_ld4(bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz));
}
afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -83,18 +67,7 @@ void main()
v = clamp(v, afp(const_min), afp(const_max));

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st4(top_blob_1d, gx, v);
}
else if (psc(dims) == 2)
{
image2d_st4(top_blob_2d, ivec2(gx, gy), v);
}
else // if (psc(dims) == 3)
{
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v);
}
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v);
#else
buffer_st4(bottom_top_blob_data, gi, v);
#endif


+ 2
- 29
src/layer/vulkan/shader/clip_pack8.comp View File

@@ -33,11 +33,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };
@@ -62,19 +58,7 @@ void main()
return;

#if NCNN_image_shader
afpvec8 v;
if (psc(dims) == 1)
{
v = image1d_ld8(bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz));
}
afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -85,18 +69,7 @@ void main()
v[1] = clamp(v[1], afp(const_min), afp(const_max));

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st8(top_blob_1d, gx, v);
}
else if (psc(dims) == 2)
{
image2d_st8(top_blob_2d, ivec2(gx, gy), v);
}
else // if (psc(dims) == 3)
{
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v);
}
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v);
#else
buffer_st8(bottom_top_blob_data, gi, v);
#endif


+ 5
- 23
src/layer/vulkan/shader/concat.comp View File

@@ -37,11 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
@@ -74,29 +70,15 @@ void main()
if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_cp1(top_blob_1d, gx + p.offset, bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
if (axis == 0) image2d_cp1(top_blob_2d, ivec2(gx, gy + p.offset), bottom_blob_2d, ivec2(gx, gy));
if (axis == 1) image2d_cp1(top_blob_2d, ivec2(gx + p.offset, gy), bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
if (axis == 0) image3d_cp1(top_blob_3d, ivec3(gx, gy, gz + p.offset), bottom_blob_3d, ivec3(gx, gy, gz));
if (axis == 1) image3d_cp1(top_blob_3d, ivec3(gx, gy + p.offset, gz), bottom_blob_3d, ivec3(gx, gy, gz));
if (axis == 2) image3d_cp1(top_blob_3d, ivec3(gx + p.offset, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
}
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

ivec3 gxyz = ivec3(gx, gy, gz);

gxyz[psc(dims) - 1 - axis] += p.offset;

#if NCNN_image_shader
image3d_cp1(top_blob_3d, gxyz, bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

int v_offset = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x;

buffer_cp1(top_blob_data, v_offset, bottom_blob_data, gi);


+ 5
- 23
src/layer/vulkan/shader/concat_pack4.comp View File

@@ -37,11 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
@@ -74,29 +70,15 @@ void main()
if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_cp4(top_blob_1d, gx + p.offset, bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
if (axis == 0) image2d_cp4(top_blob_2d, ivec2(gx, gy + p.offset), bottom_blob_2d, ivec2(gx, gy));
if (axis == 1) image2d_cp4(top_blob_2d, ivec2(gx + p.offset, gy), bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
if (axis == 0) image3d_cp4(top_blob_3d, ivec3(gx, gy, gz + p.offset), bottom_blob_3d, ivec3(gx, gy, gz));
if (axis == 1) image3d_cp4(top_blob_3d, ivec3(gx, gy + p.offset, gz), bottom_blob_3d, ivec3(gx, gy, gz));
if (axis == 2) image3d_cp4(top_blob_3d, ivec3(gx + p.offset, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
}
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

ivec3 gxyz = ivec3(gx, gy, gz);

gxyz[psc(dims) - 1 - axis] += p.offset;

#if NCNN_image_shader
image3d_cp4(top_blob_3d, gxyz, bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

int v_offset = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x;

buffer_cp4(top_blob_data, v_offset, bottom_blob_data, gi);


+ 31
- 51
src/layer/vulkan/shader/concat_pack4to1.comp View File

@@ -37,11 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
@@ -74,81 +70,65 @@ void main()
if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

ivec3 gxyz = ivec3(gx, gy, gz);

gxyz[psc(dims) - 1] *= 4;
gxyz[psc(dims) - 1 - axis] += p.offset;

#if NCNN_image_shader
afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz));

if (psc(dims) == 1)
{
afpvec4 v = image1d_ld4(bottom_blob_1d, gx);

int gx4 = gx * 4 + p.offset;

image1d_st1(top_blob_1d, gx4 + 0, v.r);
image1d_st1(top_blob_1d, gx4 + 1, v.g);
image1d_st1(top_blob_1d, gx4 + 2, v.b);
image1d_st1(top_blob_1d, gx4 + 3, v.a);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, 0, 0), v.r);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, 0, 0), v.g);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, 0, 0), v.b);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, 0, 0), v.a);
}
else if (psc(dims) == 2)
{
afpvec4 v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy));

if (axis == 0)
{
int gy4 = gy * 4 + p.offset;

image2d_st1(top_blob_2d, ivec2(gx, gy4 + 0), v.r);
image2d_st1(top_blob_2d, ivec2(gx, gy4 + 1), v.g);
image2d_st1(top_blob_2d, ivec2(gx, gy4 + 2), v.b);
image2d_st1(top_blob_2d, ivec2(gx, gy4 + 3), v.a);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, 0), v.r);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, 0), v.g);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 2, 0), v.b);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 3, 0), v.a);
}
if (axis == 1)
{
int gx4 = gx * 4 + p.offset;

image2d_st1(top_blob_2d, ivec2(gx4 + 0, gy), v.r);
image2d_st1(top_blob_2d, ivec2(gx4 + 1, gy), v.g);
image2d_st1(top_blob_2d, ivec2(gx4 + 2, gy), v.b);
image2d_st1(top_blob_2d, ivec2(gx4 + 3, gy), v.a);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, 0), v.r);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, 0), v.g);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, gxyz.y, 0), v.b);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, gxyz.y, 0), v.a);
}
}
else // if (psc(dims) == 3)
{
afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz));

if (axis == 0)
{
int gz4 = gz * 4 + p.offset;

image3d_st1(top_blob_3d, ivec3(gx, gy, gz4 + 0), v.r);
image3d_st1(top_blob_3d, ivec3(gx, gy, gz4 + 1), v.g);
image3d_st1(top_blob_3d, ivec3(gx, gy, gz4 + 2), v.b);
image3d_st1(top_blob_3d, ivec3(gx, gy, gz4 + 3), v.a);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 0), v.r);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 1), v.g);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 2), v.b);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 3), v.a);
}
if (axis == 1)
{
int gy4 = gy * 4 + p.offset;

image3d_st1(top_blob_3d, ivec3(gx, gy4 + 0, gz), v.r);
image3d_st1(top_blob_3d, ivec3(gx, gy4 + 1, gz), v.g);
image3d_st1(top_blob_3d, ivec3(gx, gy4 + 2, gz), v.b);
image3d_st1(top_blob_3d, ivec3(gx, gy4 + 3, gz), v.a);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, gxyz.z), v.r);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, gxyz.z), v.g);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 2, gxyz.z), v.b);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 3, gxyz.z), v.a);
}
if (axis == 2)
{
int gx4 = gx * 4 + p.offset;

image3d_st1(top_blob_3d, ivec3(gx4 + 0, gy, gz), v.r);
image3d_st1(top_blob_3d, ivec3(gx4 + 1, gy, gz), v.g);
image3d_st1(top_blob_3d, ivec3(gx4 + 2, gy, gz), v.b);
image3d_st1(top_blob_3d, ivec3(gx4 + 3, gy, gz), v.a);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, gxyz.z), v.r);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, gxyz.z), v.g);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, gxyz.y, gxyz.z), v.b);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, gxyz.y, gxyz.z), v.a);
}
}
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

ivec3 gxyz = ivec3(gx, gy, gz);

gxyz[psc(dims) - 1] *= 4;
gxyz[psc(dims) - 1 - axis] += p.offset;

int v_offset_0 = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x;

ivec3 gxyz4 = ivec3(1, psc(outw), psc(outcstep));


+ 5
- 23
src/layer/vulkan/shader/concat_pack8.comp View File

@@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
@@ -75,29 +71,15 @@ void main()
if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_cp8(top_blob_1d, gx + p.offset, bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
if (axis == 0) image2d_cp8(top_blob_2d, ivec2(gx, gy + p.offset), bottom_blob_2d, ivec2(gx, gy));
if (axis == 1) image2d_cp8(top_blob_2d, ivec2(gx + p.offset, gy), bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
if (axis == 0) image3d_cp8(top_blob_3d, ivec3(gx, gy, gz + p.offset), bottom_blob_3d, ivec3(gx, gy, gz));
if (axis == 1) image3d_cp8(top_blob_3d, ivec3(gx, gy + p.offset, gz), bottom_blob_3d, ivec3(gx, gy, gz));
if (axis == 2) image3d_cp8(top_blob_3d, ivec3(gx + p.offset, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
}
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

ivec3 gxyz = ivec3(gx, gy, gz);

gxyz[psc(dims) - 1 - axis] += p.offset;

#if NCNN_image_shader
image3d_cp8(top_blob_3d, gxyz, bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

int v_offset = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x;

buffer_cp8(top_blob_data, v_offset, bottom_blob_data, gi);


+ 55
- 75
src/layer/vulkan/shader/concat_pack8to1.comp View File

@@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
@@ -75,105 +71,89 @@ void main()
if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

ivec3 gxyz = ivec3(gx, gy, gz);

gxyz[psc(dims) - 1] *= 8;
gxyz[psc(dims) - 1 - axis] += p.offset;

#if NCNN_image_shader
afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz));

if (psc(dims) == 1)
{
afpvec8 v = image1d_ld8(bottom_blob_1d, gx);

int gx8 = gx * 8 + p.offset;

image1d_st1(top_blob_1d, gx8 + 0, v[0].r);
image1d_st1(top_blob_1d, gx8 + 1, v[0].g);
image1d_st1(top_blob_1d, gx8 + 2, v[0].b);
image1d_st1(top_blob_1d, gx8 + 3, v[0].a);
image1d_st1(top_blob_1d, gx8 + 4, v[1].r);
image1d_st1(top_blob_1d, gx8 + 5, v[1].g);
image1d_st1(top_blob_1d, gx8 + 6, v[1].b);
image1d_st1(top_blob_1d, gx8 + 7, v[1].a);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, 0, 0), v[0].r);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, 0, 0), v[0].g);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, 0, 0), v[0].b);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, 0, 0), v[0].a);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 4, 0, 0), v[1].r);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 5, 0, 0), v[1].g);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 6, 0, 0), v[1].b);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 7, 0, 0), v[1].a);
}
else if (psc(dims) == 2)
{
afpvec8 v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy));

if (axis == 0)
{
int gy8 = gy * 8 + p.offset;

image2d_st1(top_blob_2d, ivec2(gx, gy8 + 0), v[0].r);
image2d_st1(top_blob_2d, ivec2(gx, gy8 + 1), v[0].g);
image2d_st1(top_blob_2d, ivec2(gx, gy8 + 2), v[0].b);
image2d_st1(top_blob_2d, ivec2(gx, gy8 + 3), v[0].a);
image2d_st1(top_blob_2d, ivec2(gx, gy8 + 4), v[1].r);
image2d_st1(top_blob_2d, ivec2(gx, gy8 + 5), v[1].g);
image2d_st1(top_blob_2d, ivec2(gx, gy8 + 6), v[1].b);
image2d_st1(top_blob_2d, ivec2(gx, gy8 + 7), v[1].a);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, 0), v[0].r);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, 0), v[0].g);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 2, 0), v[0].b);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 3, 0), v[0].a);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 4, 0), v[1].r);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 5, 0), v[1].g);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 6, 0), v[1].b);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 7, 0), v[1].a);
}
if (axis == 1)
{
int gx8 = gx * 8 + p.offset;

image2d_st1(top_blob_2d, ivec2(gx8 + 0, gy), v[0].r);
image2d_st1(top_blob_2d, ivec2(gx8 + 1, gy), v[0].g);
image2d_st1(top_blob_2d, ivec2(gx8 + 2, gy), v[0].b);
image2d_st1(top_blob_2d, ivec2(gx8 + 3, gy), v[0].a);
image2d_st1(top_blob_2d, ivec2(gx8 + 4, gy), v[1].r);
image2d_st1(top_blob_2d, ivec2(gx8 + 5, gy), v[1].g);
image2d_st1(top_blob_2d, ivec2(gx8 + 6, gy), v[1].b);
image2d_st1(top_blob_2d, ivec2(gx8 + 7, gy), v[1].a);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, 0), v[0].r);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, 0), v[0].g);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, gxyz.y, 0), v[0].b);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, gxyz.y, 0), v[0].a);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 4, gxyz.y, 0), v[1].r);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 5, gxyz.y, 0), v[1].g);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 6, gxyz.y, 0), v[1].b);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 7, gxyz.y, 0), v[1].a);
}
}
else // if (psc(dims) == 3)
{
afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz));

if (axis == 0)
{
int gz8 = gz * 8 + p.offset;

image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 0), v[0].r);
image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 1), v[0].g);
image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 2), v[0].b);
image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 3), v[0].a);
image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 4), v[1].r);
image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 5), v[1].g);
image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 6), v[1].b);
image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 7), v[1].a);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 0), v[0].r);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 1), v[0].g);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 2), v[0].b);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 3), v[0].a);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 4), v[1].r);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 5), v[1].g);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 6), v[1].b);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 7), v[1].a);
}
if (axis == 1)
{
int gy8 = gy * 8 + p.offset;

image3d_st1(top_blob_3d, ivec3(gx, gy8 + 0, gz), v[0].r);
image3d_st1(top_blob_3d, ivec3(gx, gy8 + 1, gz), v[0].g);
image3d_st1(top_blob_3d, ivec3(gx, gy8 + 2, gz), v[0].b);
image3d_st1(top_blob_3d, ivec3(gx, gy8 + 3, gz), v[0].a);
image3d_st1(top_blob_3d, ivec3(gx, gy8 + 4, gz), v[1].r);
image3d_st1(top_blob_3d, ivec3(gx, gy8 + 5, gz), v[1].g);
image3d_st1(top_blob_3d, ivec3(gx, gy8 + 6, gz), v[1].b);
image3d_st1(top_blob_3d, ivec3(gx, gy8 + 7, gz), v[1].a);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, gxyz.z), v[0].r);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, gxyz.z), v[0].g);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 2, gxyz.z), v[0].b);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 3, gxyz.z), v[0].a);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 4, gxyz.z), v[1].r);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 5, gxyz.z), v[1].g);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 6, gxyz.z), v[1].b);
image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 7, gxyz.z), v[1].a);
}
if (axis == 2)
{
int gx8 = gx * 8 + p.offset;

image3d_st1(top_blob_3d, ivec3(gx8 + 0, gy, gz), v[0].r);
image3d_st1(top_blob_3d, ivec3(gx8 + 1, gy, gz), v[0].g);
image3d_st1(top_blob_3d, ivec3(gx8 + 2, gy, gz), v[0].b);
image3d_st1(top_blob_3d, ivec3(gx8 + 3, gy, gz), v[0].a);
image3d_st1(top_blob_3d, ivec3(gx8 + 4, gy, gz), v[1].r);
image3d_st1(top_blob_3d, ivec3(gx8 + 5, gy, gz), v[1].g);
image3d_st1(top_blob_3d, ivec3(gx8 + 6, gy, gz), v[1].b);
image3d_st1(top_blob_3d, ivec3(gx8 + 7, gy, gz), v[1].a);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, gxyz.z), v[0].r);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, gxyz.z), v[0].g);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, gxyz.y, gxyz.z), v[0].b);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, gxyz.y, gxyz.z), v[0].a);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 4, gxyz.y, gxyz.z), v[1].r);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 5, gxyz.y, gxyz.z), v[1].g);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 6, gxyz.y, gxyz.z), v[1].b);
image3d_st1(top_blob_3d, ivec3(gxyz.x + 7, gxyz.y, gxyz.z), v[1].a);
}
}
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

ivec3 gxyz = ivec3(gx, gy, gz);

gxyz[psc(dims) - 1] *= 8;
gxyz[psc(dims) - 1 - axis] += p.offset;

int v_offset_0 = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x;

ivec3 gxyz4 = ivec3(1, psc(outw), psc(outcstep));


+ 19
- 39
src/layer/vulkan/shader/concat_pack8to4.comp View File

@@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
@@ -75,70 +71,54 @@ void main()
if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

ivec3 gxyz = ivec3(gx, gy, gz);

gxyz[psc(dims) - 1] *= 2;
gxyz[psc(dims) - 1 - axis] += p.offset;

#if NCNN_image_shader
afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz));

if (psc(dims) == 1)
{
afpvec8 v = image1d_ld8(bottom_blob_1d, gx);

int gx2 = gx * 2 + p.offset;

image1d_st4(top_blob_1d, gx2 + 0, v[0]);
image1d_st4(top_blob_1d, gx2 + 1, v[1]);
image3d_st4(top_blob_3d, ivec3(gxyz.x + 0, 0, 0), v[0]);
image3d_st4(top_blob_3d, ivec3(gxyz.x + 1, 0, 0), v[1]);

}
else if (psc(dims) == 2)
{
afpvec8 v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy));

if (axis == 0)
{
int gy2 = gy * 2 + p.offset;

image2d_st4(top_blob_2d, ivec2(gx, gy2 + 0), v[0]);
image2d_st4(top_blob_2d, ivec2(gx, gy2 + 1), v[1]);
image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, 0), v[0]);
image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, 0), v[1]);
}
if (axis == 1)
{
int gx2 = gx * 2 + p.offset;

image2d_st4(top_blob_2d, ivec2(gx2 + 0, gy), v[0]);
image2d_st4(top_blob_2d, ivec2(gx2 + 1, gy), v[1]);
image3d_st4(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, 0), v[0]);
image3d_st4(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, 0), v[1]);
}
}
else // if (psc(dims) == 3)
{
afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz));

if (axis == 0)
{
int gz2 = gz * 2 + p.offset;

image3d_st4(top_blob_3d, ivec3(gx, gy, gz2 + 0), v[0]);
image3d_st4(top_blob_3d, ivec3(gx, gy, gz2 + 1), v[1]);
image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 0), v[0]);
image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 1), v[1]);
}
if (axis == 1)
{
int gy2 = gy * 2 + p.offset;

image3d_st4(top_blob_3d, ivec3(gx, gy2 + 0, gz), v[0]);
image3d_st4(top_blob_3d, ivec3(gx, gy2 + 1, gz), v[1]);
image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, gxyz.z), v[0]);
image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, gxyz.z), v[1]);
}
if (axis == 2)
{
int gx2 = gx * 2 + p.offset;

image3d_st4(top_blob_3d, ivec3(gx2 + 0, gy, gz), v[0]);
image3d_st4(top_blob_3d, ivec3(gx2 + 1, gy, gz), v[1]);
image3d_st4(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, gxyz.z), v[0]);
image3d_st4(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, gxyz.z), v[1]);
}
}
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

ivec3 gxyz = ivec3(gx, gy, gz);

gxyz[psc(dims) - 1] *= 2;
gxyz[psc(dims) - 1 - axis] += p.offset;

int v_offset_0 = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x;

ivec3 gxyz4 = ivec3(1, psc(outw), psc(outcstep));


+ 2
- 2
src/layer/vulkan/shader/convolution.comp View File

@@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
@@ -86,7 +86,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld1(bias_blob, gz);
sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld1(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolution_1x1s1d1.comp View File

@@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
#if NCNN_fp16_packed
layout (binding = 0) readonly buffer bottom_blob { vec4 bottom_blob_data[]; };
@@ -100,7 +100,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = afpvec4(image1d_ld1(bias_blob, gz));
sum = afpvec4(image3d_ld1(bias_blob, ivec3(gz, 0, 0)));
#else
sum = afpvec4(buffer_ld1(bias_data, gz));
#endif


+ 2
- 2
src/layer/vulkan/shader/convolution_pack1to4.comp View File

@@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -86,7 +86,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld4(bias_blob, gz);
sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld4(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolution_pack1to8.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld8(bias_blob, gz);
sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld8(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolution_pack4.comp View File

@@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -91,7 +91,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld4(bias_blob, gz);
sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld4(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolution_pack4_1x1s1d1.comp View File

@@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -103,7 +103,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
afpvec4 b = image1d_ld4(bias_blob, gz);
afpvec4 b = image3d_ld4(bias_blob, ivec3(gz, 0, 0));
#else
afpvec4 b = buffer_ld4(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolution_pack4_3x3s1d1_winograd23_transform_output.comp View File

@@ -40,7 +40,7 @@ layout (constant_id = shape_constant_id_offset + 6) const int outcstep = 0;
#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler3D top_tm_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler1D bias_blob;
layout (binding = 2) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer top_tm_blob { sfpvec4 top_tm_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -129,7 +129,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
const afpvec4 bias_value = image1d_ld4(bias_blob, gz);
const afpvec4 bias_value = image3d_ld4(bias_blob, ivec3(gz, 0, 0));
#else
const afpvec4 bias_value = buffer_ld4(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolution_pack4to1.comp View File

@@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
@@ -86,7 +86,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld1(bias_blob, gz);
sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld1(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolution_pack4to8.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld8(bias_blob, gz);
sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld8(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolution_pack8.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld8(bias_blob, gz);
sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld8(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolution_pack8_1x1s1d1.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -99,7 +99,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
afpvec8 b = image1d_ld8(bias_blob, gz);
afpvec8 b = image3d_ld8(bias_blob, ivec3(gz, 0, 0));
#else
afpvec8 b = buffer_ld8(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolution_pack8_3x3s1d1_winograd23_transform_output.comp View File

@@ -41,7 +41,7 @@ layout (constant_id = shape_constant_id_offset + 6) const int outcstep = 0;
#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler3D top_tm_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler1D bias_blob;
layout (binding = 2) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer top_tm_blob { sfpvec8 top_tm_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -130,7 +130,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
const afpvec8 bias_value = image1d_ld8(bias_blob, gz);
const afpvec8 bias_value = image3d_ld8(bias_blob, ivec3(gz, 0, 0));
#else
const afpvec8 bias_value = buffer_ld8(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolution_pack8to1.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld1(bias_blob, gz);
sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld1(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolution_pack8to4.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld4(bias_blob, gz);
sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld4(bias_data, gz);
#endif


+ 4
- 4
src/layer/vulkan/shader/convolutiondepthwise.comp View File

@@ -49,8 +49,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler2D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld1(bias_blob, gz);
sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld1(bias_data, gz);
#endif
@@ -108,7 +108,7 @@ void main()

for (int x = 0; x < kernel_w; x++)
{
sum += image2d_ld1(weight_blob, ivec2(wx, gz)) * image3d_ld1(bottom_blob, ivec3(sx, sy, gz));
sum += image3d_ld1(weight_blob, ivec3(wx, gz, 0)) * image3d_ld1(bottom_blob, ivec3(sx, sy, gz));

sx += dilation_w;
wx += 1;


+ 2
- 2
src/layer/vulkan/shader/convolutiondepthwise_group.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld1(bias_blob, gz);
sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld1(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolutiondepthwise_group_pack1to4.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld4(bias_blob, gz);
sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld4(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolutiondepthwise_group_pack1to8.comp View File

@@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -88,7 +88,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld8(bias_blob, gz);
sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld8(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolutiondepthwise_group_pack4.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -92,7 +92,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld4(bias_blob, gz);
sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld4(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolutiondepthwise_group_pack4to1.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld1(bias_blob, gz);
sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld1(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolutiondepthwise_group_pack4to8.comp View File

@@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -88,7 +88,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld8(bias_blob, gz);
sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld8(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolutiondepthwise_group_pack8.comp View File

@@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -88,7 +88,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld8(bias_blob, gz);
sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld8(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolutiondepthwise_group_pack8to1.comp View File

@@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
@@ -88,7 +88,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld1(bias_blob, gz);
sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld1(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/convolutiondepthwise_group_pack8to4.comp View File

@@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -88,7 +88,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld4(bias_blob, gz);
sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld4(bias_data, gz);
#endif


+ 4
- 4
src/layer/vulkan/shader/convolutiondepthwise_pack4.comp View File

@@ -49,8 +49,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler2D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld4(bias_blob, gz);
sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld4(bias_data, gz);
#endif
@@ -110,7 +110,7 @@ void main()
{
afpvec4 v = image3d_ld4(bottom_blob, ivec3(sx, sy, gz));

afpvec4 k = image2d_ld4(weight_blob, ivec2(wx, gz));
afpvec4 k = image3d_ld4(weight_blob, ivec3(wx, gz, 0));

sum += v * k;



+ 4
- 4
src/layer/vulkan/shader/convolutiondepthwise_pack8.comp View File

@@ -50,8 +50,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler2D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -88,7 +88,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld8(bias_blob, gz);
sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld8(bias_data, gz);
#endif
@@ -111,7 +111,7 @@ void main()
{
afpvec8 v = image3d_ld8(bottom_blob, ivec3(sx, sy, gz));

afpvec8 k = image2d_ld8(weight_blob, ivec2(wx, gz));
afpvec8 k = image3d_ld8(weight_blob, ivec3(wx, gz, 0));

// sum += v * k;
sum[0] += v[0] * k[0];


+ 2
- 2
src/layer/vulkan/shader/deconvolution.comp View File

@@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
@@ -86,7 +86,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld1(bias_blob, gz);
sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld1(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/deconvolution_pack1to4.comp View File

@@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -86,7 +86,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld4(bias_blob, gz);
sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld4(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/deconvolution_pack1to8.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld8(bias_blob, gz);
sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld8(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/deconvolution_pack4.comp View File

@@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -91,7 +91,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld4(bias_blob, gz);
sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld4(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/deconvolution_pack4to1.comp View File

@@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
@@ -86,7 +86,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld1(bias_blob, gz);
sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld1(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/deconvolution_pack4to8.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld8(bias_blob, gz);
sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld8(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/deconvolution_pack8.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld8(bias_blob, gz);
sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld8(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/deconvolution_pack8to1.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld1(bias_blob, gz);
sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld1(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/deconvolution_pack8to4.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld4(bias_blob, gz);
sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld4(bias_data, gz);
#endif


+ 4
- 4
src/layer/vulkan/shader/deconvolutiondepthwise.comp View File

@@ -49,8 +49,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler2D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld1(bias_blob, gz);
sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld1(bias_data, gz);
#endif
@@ -124,7 +124,7 @@ void main()

int wx = y * kernel_w + x;

sum += image2d_ld1(weight_blob, ivec2(wx, gz)) * image3d_ld1(bottom_blob, ivec3(sx, sy, gz));
sum += image3d_ld1(weight_blob, ivec3(wx, gz, 0)) * image3d_ld1(bottom_blob, ivec3(sx, sy, gz));
}
}
#else


+ 2
- 2
src/layer/vulkan/shader/deconvolutiondepthwise_group.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld1(bias_blob, gz);
sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld1(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to4.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld4(bias_blob, gz);
sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld4(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/deconvolutiondepthwise_group_pack1to8.comp View File

@@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -88,7 +88,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld8(bias_blob, gz);
sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld8(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -92,7 +92,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld4(bias_blob, gz);
sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld4(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to1.comp View File

@@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld1(bias_blob, gz);
sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld1(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/deconvolutiondepthwise_group_pack4to8.comp View File

@@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -88,7 +88,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld8(bias_blob, gz);
sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld8(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8.comp View File

@@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -88,7 +88,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld8(bias_blob, gz);
sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld8(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to1.comp View File

@@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
@@ -88,7 +88,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld1(bias_blob, gz);
sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld1(bias_data, gz);
#endif


+ 2
- 2
src/layer/vulkan/shader/deconvolutiondepthwise_group_pack8to4.comp View File

@@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -88,7 +88,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld4(bias_blob, gz);
sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld4(bias_data, gz);
#endif


+ 4
- 4
src/layer/vulkan/shader/deconvolutiondepthwise_pack4.comp View File

@@ -49,8 +49,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler2D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -87,7 +87,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld4(bias_blob, gz);
sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld4(bias_data, gz);
#endif
@@ -126,7 +126,7 @@ void main()

afpvec4 v = image3d_ld4(bottom_blob, ivec3(sx, sy, gz));

afpvec4 k = image2d_ld4(weight_blob, ivec2(wx, gz));
afpvec4 k = image3d_ld4(weight_blob, ivec3(wx, gz, 0));

sum += v * k;
}


+ 4
- 4
src/layer/vulkan/shader/deconvolutiondepthwise_pack8.comp View File

@@ -50,8 +50,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler3D bottom_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
layout (binding = 2) uniform unfp sampler2D weight_blob;
layout (binding = 3) uniform unfp sampler1D bias_blob;
layout (binding = 2) uniform unfp sampler3D weight_blob;
layout (binding = 3) uniform unfp sampler3D bias_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -88,7 +88,7 @@ void main()
if (bias_term == 1)
{
#if NCNN_image_shader
sum = image1d_ld8(bias_blob, gz);
sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0));
#else
sum = buffer_ld8(bias_data, gz);
#endif
@@ -127,7 +127,7 @@ void main()

afpvec8 v = image3d_ld8(bottom_blob, ivec3(sx, sy, gz));

afpvec8 k = image2d_ld8(weight_blob, ivec2(wx, gz));
afpvec8 k = image3d_ld8(weight_blob, ivec3(wx, gz, 0));

// sum += v * k;
sum[0] += v[0] * k[0];


+ 1
- 16
src/layer/vulkan/shader/deepcopy.comp View File

@@ -29,11 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
@@ -59,18 +55,7 @@ void main()
return;

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_cp1(top_blob_1d, gx, bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
image2d_cp1(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
}
image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;



+ 1
- 16
src/layer/vulkan/shader/deepcopy_pack4.comp View File

@@ -29,11 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
@@ -59,18 +55,7 @@ void main()
return;

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_cp4(top_blob_1d, gx, bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
image2d_cp4(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
}
image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;



+ 1
- 16
src/layer/vulkan/shader/deepcopy_pack8.comp View File

@@ -30,11 +30,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
@@ -60,18 +56,7 @@ void main()
return;

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_cp8(top_blob_1d, gx, bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
image2d_cp8(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
}
image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;



+ 2
- 29
src/layer/vulkan/shader/dropout.comp View File

@@ -31,11 +31,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };
@@ -60,19 +56,7 @@ void main()
return;

#if NCNN_image_shader
afp v;
if (psc(dims) == 1)
{
v = image1d_ld1(bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz));
}
afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -82,18 +66,7 @@ void main()
v *= afp(scale);

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st1(top_blob_1d, gx, v);
}
else if (psc(dims) == 2)
{
image2d_st1(top_blob_2d, ivec2(gx, gy), v);
}
else // if (psc(dims) == 3)
{
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v);
}
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v);
#else
buffer_st1(bottom_top_blob_data, gi, v);
#endif


+ 2
- 29
src/layer/vulkan/shader/dropout_pack4.comp View File

@@ -31,11 +31,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };
@@ -60,19 +56,7 @@ void main()
return;

#if NCNN_image_shader
afpvec4 v;
if (psc(dims) == 1)
{
v = image1d_ld4(bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz));
}
afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -82,18 +66,7 @@ void main()
v *= afp(scale);

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st4(top_blob_1d, gx, v);
}
else if (psc(dims) == 2)
{
image2d_st4(top_blob_2d, ivec2(gx, gy), v);
}
else // if (psc(dims) == 3)
{
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v);
}
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v);
#else
buffer_st4(bottom_top_blob_data, gi, v);
#endif


+ 2
- 29
src/layer/vulkan/shader/dropout_pack8.comp View File

@@ -32,11 +32,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };
@@ -61,19 +57,7 @@ void main()
return;

#if NCNN_image_shader
afpvec8 v;
if (psc(dims) == 1)
{
v = image1d_ld8(bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz));
}
afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -84,18 +68,7 @@ void main()
v[1] = v[1] * afp(scale);

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st8(top_blob_1d, gx, v);
}
else if (psc(dims) == 2)
{
image2d_st8(top_blob_2d, ivec2(gx, gy), v);
}
else // if (psc(dims) == 3)
{
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v);
}
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v);
#else
buffer_st8(bottom_top_blob_data, gi, v);
#endif


+ 3
- 35
src/layer/vulkan/shader/eltwise.comp View File

@@ -32,14 +32,8 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob1_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob1_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob1_3d;
layout (binding = 1) uniform unfp sampler1D bottom_blob2_1d;
layout (binding = 1) uniform unfp sampler2D bottom_blob2_2d;
layout (binding = 1) uniform unfp sampler3D bottom_blob2_3d;
layout (binding = 2, imfmtc1) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 2, imfmtc1) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 2, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob1 { sfp bottom_blob1_data[]; };
@@ -69,23 +63,8 @@ void main()
return;

#if NCNN_image_shader
afp v1;
afp v2;
if (psc(dims) == 1)
{
v1 = image1d_ld1(bottom_blob1_1d, gx);
v2 = image1d_ld1(bottom_blob2_1d, gx);
}
else if (psc(dims) == 2)
{
v1 = image2d_ld1(bottom_blob1_2d, ivec2(gx, gy));
v2 = image2d_ld1(bottom_blob2_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v1 = image3d_ld1(bottom_blob1_3d, ivec3(gx, gy, gz));
v2 = image3d_ld1(bottom_blob2_3d, ivec3(gx, gy, gz));
}
afp v1 = image3d_ld1(bottom_blob1_3d, ivec3(gx, gy, gz));
afp v2 = image3d_ld1(bottom_blob2_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -119,18 +98,7 @@ void main()
}

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st1(top_blob_1d, gx, res);
}
else if (psc(dims) == 2)
{
image2d_st1(top_blob_2d, ivec2(gx, gy), res);
}
else // if (psc(dims) == 3)
{
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res);
}
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res);
#else
buffer_st1(top_blob_data, gi, res);
#endif


+ 3
- 35
src/layer/vulkan/shader/eltwise_pack4.comp View File

@@ -32,14 +32,8 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob1_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob1_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob1_3d;
layout (binding = 1) uniform unfp sampler1D bottom_blob2_1d;
layout (binding = 1) uniform unfp sampler2D bottom_blob2_2d;
layout (binding = 1) uniform unfp sampler3D bottom_blob2_3d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob1 { sfpvec4 bottom_blob1_data[]; };
@@ -69,23 +63,8 @@ void main()
return;

#if NCNN_image_shader
afpvec4 v1;
afpvec4 v2;
if (psc(dims) == 1)
{
v1 = image1d_ld4(bottom_blob1_1d, gx);
v2 = image1d_ld4(bottom_blob2_1d, gx);
}
else if (psc(dims) == 2)
{
v1 = image2d_ld4(bottom_blob1_2d, ivec2(gx, gy));
v2 = image2d_ld4(bottom_blob2_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v1 = image3d_ld4(bottom_blob1_3d, ivec3(gx, gy, gz));
v2 = image3d_ld4(bottom_blob2_3d, ivec3(gx, gy, gz));
}
afpvec4 v1 = image3d_ld4(bottom_blob1_3d, ivec3(gx, gy, gz));
afpvec4 v2 = image3d_ld4(bottom_blob2_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -119,18 +98,7 @@ void main()
}

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st4(top_blob_1d, gx, res);
}
else if (psc(dims) == 2)
{
image2d_st4(top_blob_2d, ivec2(gx, gy), res);
}
else // if (psc(dims) == 3)
{
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res);
}
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res);
#else
buffer_st4(top_blob_data, gi, res);
#endif


+ 3
- 35
src/layer/vulkan/shader/eltwise_pack8.comp View File

@@ -33,14 +33,8 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob1_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob1_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob1_3d;
layout (binding = 1) uniform unfp sampler1D bottom_blob2_1d;
layout (binding = 1) uniform unfp sampler2D bottom_blob2_2d;
layout (binding = 1) uniform unfp sampler3D bottom_blob2_3d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) readonly buffer bottom_blob1 { sfpvec8 bottom_blob1_data[]; };
@@ -70,23 +64,8 @@ void main()
return;

#if NCNN_image_shader
afpvec8 v1;
afpvec8 v2;
if (psc(dims) == 1)
{
v1 = image1d_ld8(bottom_blob1_1d, gx);
v2 = image1d_ld8(bottom_blob2_1d, gx);
}
else if (psc(dims) == 2)
{
v1 = image2d_ld8(bottom_blob1_2d, ivec2(gx, gy));
v2 = image2d_ld8(bottom_blob2_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v1 = image3d_ld8(bottom_blob1_3d, ivec3(gx, gy, gz));
v2 = image3d_ld8(bottom_blob2_3d, ivec3(gx, gy, gz));
}
afpvec8 v1 = image3d_ld8(bottom_blob1_3d, ivec3(gx, gy, gz));
afpvec8 v2 = image3d_ld8(bottom_blob2_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -138,18 +117,7 @@ void main()
}

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st8(top_blob_1d, gx, res);
}
else if (psc(dims) == 2)
{
image2d_st8(top_blob_2d, ivec2(gx, gy), res);
}
else // if (psc(dims) == 3)
{
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res);
}
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res);
#else
buffer_st8(top_blob_data, gi, res);
#endif


+ 3
- 4
src/layer/vulkan/shader/flatten.comp View File

@@ -35,9 +35,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob;
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
@@ -78,14 +77,14 @@ void main()

if (psc(dims) == 2)
{
v = image2d_ld1(bottom_blob_2d, ivec2(x, y));
v = image3d_ld1(bottom_blob_3d, ivec3(x, y, 0));
}
else // if (psc(dims) == 3)
{
v = image3d_ld1(bottom_blob_3d, ivec3(x, y, z));
}

image1d_st1(top_blob, gx, v);
image3d_st1(top_blob, ivec3(gx, 0, 0), v);
#else
int v_offset = z * psc(cstep) + y * psc(w) + x;



+ 6
- 7
src/layer/vulkan/shader/flatten_pack1to4.comp View File

@@ -35,9 +35,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
@@ -77,10 +76,10 @@ void main()
ivec4 y4 = i4 / psc(w);
ivec4 x4 = i4 % psc(w);

v.r = image2d_ld1(bottom_blob_2d, ivec2(x4.r, y4.r));
v.g = image2d_ld1(bottom_blob_2d, ivec2(x4.g, y4.g));
v.b = image2d_ld1(bottom_blob_2d, ivec2(x4.b, y4.b));
v.a = image2d_ld1(bottom_blob_2d, ivec2(x4.a, y4.a));
v.r = image3d_ld1(bottom_blob_3d, ivec3(x4.r, y4.r, 0));
v.g = image3d_ld1(bottom_blob_3d, ivec3(x4.g, y4.g, 0));
v.b = image3d_ld1(bottom_blob_3d, ivec3(x4.b, y4.b, 0));
v.a = image3d_ld1(bottom_blob_3d, ivec3(x4.a, y4.a, 0));
}
else // if (psc(dims) == 3)
{
@@ -96,7 +95,7 @@ void main()
v.a = image3d_ld1(bottom_blob_3d, ivec3(x4.a, y4.a, z4.a));
}

image1d_st4(top_blob, gx, v);
image3d_st4(top_blob, ivec3(gx, 0, 0), v);
#else
ivec4 v_offset;



+ 10
- 11
src/layer/vulkan/shader/flatten_pack1to8.comp View File

@@ -36,9 +36,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
#else
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
@@ -82,14 +81,14 @@ void main()
ivec4 yy4 = ii4 / psc(w);
ivec4 xx4 = ii4 % psc(w);

v[0].r = image2d_ld1(bottom_blob_2d, ivec2(x4.r, y4.r));
v[0].g = image2d_ld1(bottom_blob_2d, ivec2(x4.g, y4.g));
v[0].b = image2d_ld1(bottom_blob_2d, ivec2(x4.b, y4.b));
v[0].a = image2d_ld1(bottom_blob_2d, ivec2(x4.a, y4.a));
v[1].r = image2d_ld1(bottom_blob_2d, ivec2(xx4.r, yy4.r));
v[1].g = image2d_ld1(bottom_blob_2d, ivec2(xx4.g, yy4.g));
v[1].b = image2d_ld1(bottom_blob_2d, ivec2(xx4.b, yy4.b));
v[1].a = image2d_ld1(bottom_blob_2d, ivec2(xx4.a, yy4.a));
v[0].r = image3d_ld1(bottom_blob_3d, ivec3(x4.r, y4.r, 0));
v[0].g = image3d_ld1(bottom_blob_3d, ivec3(x4.g, y4.g, 0));
v[0].b = image3d_ld1(bottom_blob_3d, ivec3(x4.b, y4.b, 0));
v[0].a = image3d_ld1(bottom_blob_3d, ivec3(x4.a, y4.a, 0));
v[1].r = image3d_ld1(bottom_blob_3d, ivec3(xx4.r, yy4.r, 0));
v[1].g = image3d_ld1(bottom_blob_3d, ivec3(xx4.g, yy4.g, 0));
v[1].b = image3d_ld1(bottom_blob_3d, ivec3(xx4.b, yy4.b, 0));
v[1].a = image3d_ld1(bottom_blob_3d, ivec3(xx4.a, yy4.a, 0));
}
else // if (psc(dims) == 3)
{
@@ -113,7 +112,7 @@ void main()
v[1].a = image3d_ld1(bottom_blob_3d, ivec3(xx4.a, yy4.a, zz4.a));
}

image1d_st8(top_blob, gx, v);
image3d_st8(top_blob, ivec3(gx, 0, 0), v);
#else
ivec4 v_offset;
ivec4 vv_offset;


+ 6
- 7
src/layer/vulkan/shader/flatten_pack4.comp View File

@@ -35,9 +35,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
#else
#if NCNN_fp16_packed
layout (binding = 0) readonly buffer bottom_blob { sfpvec2 bottom_blob_data[]; };
@@ -81,10 +80,10 @@ void main()
ivec4 y4 = i4 / psc(w);
ivec4 x4 = i4 % psc(w);

afpvec4 v0 = image2d_ld4(bottom_blob_2d, ivec2(x4.r, y4.r / 4));
afpvec4 v1 = image2d_ld4(bottom_blob_2d, ivec2(x4.g, y4.g / 4));
afpvec4 v2 = image2d_ld4(bottom_blob_2d, ivec2(x4.b, y4.b / 4));
afpvec4 v3 = image2d_ld4(bottom_blob_2d, ivec2(x4.a, y4.a / 4));
afpvec4 v0 = image3d_ld4(bottom_blob_3d, ivec3(x4.r, y4.r / 4, 0));
afpvec4 v1 = image3d_ld4(bottom_blob_3d, ivec3(x4.g, y4.g / 4, 0));
afpvec4 v2 = image3d_ld4(bottom_blob_3d, ivec3(x4.b, y4.b / 4, 0));
afpvec4 v3 = image3d_ld4(bottom_blob_3d, ivec3(x4.a, y4.a / 4, 0));

v.r = v0[y4.r % 4];
v.g = v1[y4.g % 4];
@@ -110,7 +109,7 @@ void main()
v.a = v3[z4.a % 4];
}

image1d_st4(top_blob, gx, v);
image3d_st4(top_blob, ivec3(gx, 0, 0), v);
#else
#if NCNN_fp16_packed
ivec4 v_offset;


+ 10
- 11
src/layer/vulkan/shader/flatten_pack4to8.comp View File

@@ -36,9 +36,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
#else
#if NCNN_fp16_packed
layout (binding = 0) readonly buffer bottom_blob { sfpvec2 bottom_blob_data[]; };
@@ -85,14 +84,14 @@ void main()
ivec4 yy4 = ii4 / psc(w);
ivec4 xx4 = ii4 % psc(w);

afpvec4 v0 = image2d_ld4(bottom_blob_2d, ivec2(x4.r, y4.r / 4));
afpvec4 v1 = image2d_ld4(bottom_blob_2d, ivec2(x4.g, y4.g / 4));
afpvec4 v2 = image2d_ld4(bottom_blob_2d, ivec2(x4.b, y4.b / 4));
afpvec4 v3 = image2d_ld4(bottom_blob_2d, ivec2(x4.a, y4.a / 4));
afpvec4 v4 = image2d_ld4(bottom_blob_2d, ivec2(xx4.r, yy4.r / 4));
afpvec4 v5 = image2d_ld4(bottom_blob_2d, ivec2(xx4.g, yy4.g / 4));
afpvec4 v6 = image2d_ld4(bottom_blob_2d, ivec2(xx4.b, yy4.b / 4));
afpvec4 v7 = image2d_ld4(bottom_blob_2d, ivec2(xx4.a, yy4.a / 4));
afpvec4 v0 = image3d_ld4(bottom_blob_3d, ivec3(x4.r, y4.r / 4, 0));
afpvec4 v1 = image3d_ld4(bottom_blob_3d, ivec3(x4.g, y4.g / 4, 0));
afpvec4 v2 = image3d_ld4(bottom_blob_3d, ivec3(x4.b, y4.b / 4, 0));
afpvec4 v3 = image3d_ld4(bottom_blob_3d, ivec3(x4.a, y4.a / 4, 0));
afpvec4 v4 = image3d_ld4(bottom_blob_3d, ivec3(xx4.r, yy4.r / 4, 0));
afpvec4 v5 = image3d_ld4(bottom_blob_3d, ivec3(xx4.g, yy4.g / 4, 0));
afpvec4 v6 = image3d_ld4(bottom_blob_3d, ivec3(xx4.b, yy4.b / 4, 0));
afpvec4 v7 = image3d_ld4(bottom_blob_3d, ivec3(xx4.a, yy4.a / 4, 0));

v[0].r = v0[y4.r % 4];
v[0].g = v1[y4.g % 4];
@@ -133,7 +132,7 @@ void main()
v[1].a = v7[zz4.a % 4];
}

image1d_st8(top_blob, gx, v);
image3d_st8(top_blob, ivec3(gx, 0, 0), v);
#else
#if NCNN_fp16_packed
ivec4 v_offset;


+ 10
- 11
src/layer/vulkan/shader/flatten_pack8.comp View File

@@ -36,9 +36,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
#else
#if NCNN_fp16_packed
layout (binding = 0) readonly buffer bottom_blob { sfpvec2 bottom_blob_data[]; };
@@ -85,14 +84,14 @@ void main()
ivec4 yy4 = ii4 / psc(w);
ivec4 xx4 = ii4 % psc(w);

afpvec8 v0 = image2d_ld8(bottom_blob_2d, ivec2(x4.r, y4.r / 8));
afpvec8 v1 = image2d_ld8(bottom_blob_2d, ivec2(x4.g, y4.g / 8));
afpvec8 v2 = image2d_ld8(bottom_blob_2d, ivec2(x4.b, y4.b / 8));
afpvec8 v3 = image2d_ld8(bottom_blob_2d, ivec2(x4.a, y4.a / 8));
afpvec8 v4 = image2d_ld8(bottom_blob_2d, ivec2(xx4.r, yy4.r / 8));
afpvec8 v5 = image2d_ld8(bottom_blob_2d, ivec2(xx4.g, yy4.g / 8));
afpvec8 v6 = image2d_ld8(bottom_blob_2d, ivec2(xx4.b, yy4.b / 8));
afpvec8 v7 = image2d_ld8(bottom_blob_2d, ivec2(xx4.a, yy4.a / 8));
afpvec8 v0 = image3d_ld8(bottom_blob_3d, ivec3(x4.r, y4.r / 8, 0));
afpvec8 v1 = image3d_ld8(bottom_blob_3d, ivec3(x4.g, y4.g / 8, 0));
afpvec8 v2 = image3d_ld8(bottom_blob_3d, ivec3(x4.b, y4.b / 8, 0));
afpvec8 v3 = image3d_ld8(bottom_blob_3d, ivec3(x4.a, y4.a / 8, 0));
afpvec8 v4 = image3d_ld8(bottom_blob_3d, ivec3(xx4.r, yy4.r / 8, 0));
afpvec8 v5 = image3d_ld8(bottom_blob_3d, ivec3(xx4.g, yy4.g / 8, 0));
afpvec8 v6 = image3d_ld8(bottom_blob_3d, ivec3(xx4.b, yy4.b / 8, 0));
afpvec8 v7 = image3d_ld8(bottom_blob_3d, ivec3(xx4.a, yy4.a / 8, 0));

v[0].r = v0[(y4.r % 8) / 4][y4.r % 4];
v[0].g = v1[(y4.g % 8) / 4][y4.g % 4];
@@ -133,7 +132,7 @@ void main()
v[1].a = v7[(zz4.a % 8) / 4][zz4.a % 4];
}

image1d_st8(top_blob, gx, v);
image3d_st8(top_blob, ivec3(gx, 0, 0), v);
#else
#if NCNN_fp16_packed
ivec4 v_offset;


+ 2
- 25
src/layer/vulkan/shader/hardsigmoid.comp View File

@@ -61,19 +61,7 @@ void main()
return;

#if NCNN_image_shader
afp v;
if (psc(dims) == 1)
{
v = image1d_ld1(bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz));
}
afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -83,18 +71,7 @@ void main()
v = clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f));

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st1(top_blob_1d, gx, v);
}
else if (psc(dims) == 2)
{
image2d_st1(top_blob_2d, ivec2(gx, gy), v);
}
else // if (psc(dims) == 3)
{
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v);
}
image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v);
#else
buffer_st1(bottom_top_blob_data, gi, v);
#endif


+ 2
- 29
src/layer/vulkan/shader/hardsigmoid_pack4.comp View File

@@ -32,11 +32,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };
@@ -61,19 +57,7 @@ void main()
return;

#if NCNN_image_shader
afpvec4 v;
if (psc(dims) == 1)
{
v = image1d_ld4(bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz));
}
afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -83,18 +67,7 @@ void main()
v = clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f));

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st4(top_blob_1d, gx, v);
}
else if (psc(dims) == 2)
{
image2d_st4(top_blob_2d, ivec2(gx, gy), v);
}
else // if (psc(dims) == 3)
{
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v);
}
image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v);
#else
buffer_st4(bottom_top_blob_data, gi, v);
#endif


+ 2
- 29
src/layer/vulkan/shader/hardsigmoid_pack8.comp View File

@@ -33,11 +33,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

#if NCNN_image_shader
layout (binding = 0) uniform unfp sampler1D bottom_blob_1d;
layout (binding = 0) uniform unfp sampler2D bottom_blob_2d;
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d;
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
#else
layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };
@@ -62,19 +58,7 @@ void main()
return;

#if NCNN_image_shader
afpvec8 v;
if (psc(dims) == 1)
{
v = image1d_ld8(bottom_blob_1d, gx);
}
else if (psc(dims) == 2)
{
v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy));
}
else // if (psc(dims) == 3)
{
v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz));
}
afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz));
#else
const int gi = gz * psc(cstep) + gy * psc(w) + gx;

@@ -85,18 +69,7 @@ void main()
v[1] = clamp(v[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f));

#if NCNN_image_shader
if (psc(dims) == 1)
{
image1d_st8(top_blob_1d, gx, v);
}
else if (psc(dims) == 2)
{
image2d_st8(top_blob_2d, ivec2(gx, gy), v);
}
else // if (psc(dims) == 3)
{
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v);
}
image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v);
#else
buffer_st8(bottom_top_blob_data, gi, v);
#endif


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save