* drop bug_layout_binding_id_alias flagtags/20201208
| @@ -375,13 +375,13 @@ VkDeviceMemory VkAllocator::allocate_dedicated_memory(size_t size, uint32_t memo | |||
| return memory; | |||
| } | |||
| VkImage VkAllocator::create_image(VkImageType type, int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage) | |||
| VkImage VkAllocator::create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage) | |||
| { | |||
| VkImageCreateInfo imageCreateInfo; | |||
| imageCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | |||
| imageCreateInfo.pNext = 0; | |||
| imageCreateInfo.flags = 0; | |||
| imageCreateInfo.imageType = type; | |||
| imageCreateInfo.imageType = VK_IMAGE_TYPE_3D; | |||
| imageCreateInfo.format = format; | |||
| imageCreateInfo.extent.width = width; | |||
| imageCreateInfo.extent.height = height; | |||
| @@ -400,21 +400,21 @@ VkImage VkAllocator::create_image(VkImageType type, int width, int height, int d | |||
| VkResult ret = vkCreateImage(vkdev->vkdevice(), &imageCreateInfo, 0, &image); | |||
| if (ret != VK_SUCCESS) | |||
| { | |||
| NCNN_LOGE("vkCreateImage failed %d %d %d %d %d %d %d %d", ret, type, width, height, depth, format, tiling, usage); | |||
| NCNN_LOGE("vkCreateImage failed %d %d %d %d %d %d %d", ret, width, height, depth, format, tiling, usage); | |||
| return 0; | |||
| } | |||
| return image; | |||
| } | |||
| VkImageView VkAllocator::create_imageview(VkImageViewType type, VkImage image, VkFormat format) | |||
| VkImageView VkAllocator::create_imageview(VkImage image, VkFormat format) | |||
| { | |||
| VkImageViewCreateInfo imageViewCreateInfo; | |||
| imageViewCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; | |||
| imageViewCreateInfo.pNext = 0; | |||
| imageViewCreateInfo.flags = 0; | |||
| imageViewCreateInfo.image = image; | |||
| imageViewCreateInfo.viewType = type; | |||
| imageViewCreateInfo.viewType = VK_IMAGE_VIEW_TYPE_3D; | |||
| imageViewCreateInfo.format = format; | |||
| imageViewCreateInfo.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; | |||
| imageViewCreateInfo.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; | |||
| @@ -709,7 +709,7 @@ void VkBlobAllocator::fastFree(VkBufferMemory* ptr) | |||
| delete ptr; | |||
| } | |||
| VkImageMemory* VkBlobAllocator::fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack) | |||
| VkImageMemory* VkBlobAllocator::fastMalloc(int w, int h, int c, size_t elemsize, int elempack) | |||
| { | |||
| if (elempack != 1 && elempack != 4 && elempack != 8) | |||
| { | |||
| @@ -743,48 +743,16 @@ VkImageMemory* VkBlobAllocator::fastMalloc(int dims, int w, int h, int c, size_t | |||
| // large elempack spills on image w | |||
| if (elempack == 8) width *= 2; | |||
| VkImageType image_type; | |||
| VkImageViewType imageview_type; | |||
| if (dims == 1) | |||
| if (width > (int)vkdev->info.max_image_dimension_3d || height > (int)vkdev->info.max_image_dimension_3d || depth > (int)vkdev->info.max_image_dimension_3d) | |||
| { | |||
| image_type = VK_IMAGE_TYPE_1D; | |||
| imageview_type = VK_IMAGE_VIEW_TYPE_1D; | |||
| if (width > (int)vkdev->info.max_image_dimension_1d) | |||
| { | |||
| NCNN_LOGE("image dimension too large %d > %d", width, (int)vkdev->info.max_image_dimension_1d); | |||
| return 0; | |||
| } | |||
| } | |||
| else if (dims == 2) | |||
| { | |||
| image_type = VK_IMAGE_TYPE_2D; | |||
| imageview_type = VK_IMAGE_VIEW_TYPE_2D; | |||
| if (width > (int)vkdev->info.max_image_dimension_2d || height > (int)vkdev->info.max_image_dimension_2d) | |||
| { | |||
| NCNN_LOGE("image dimension too large %d %d > %d", width, height, (int)vkdev->info.max_image_dimension_2d); | |||
| return 0; | |||
| } | |||
| } | |||
| else // if (dims == 3) | |||
| { | |||
| image_type = VK_IMAGE_TYPE_3D; | |||
| imageview_type = VK_IMAGE_VIEW_TYPE_3D; | |||
| if (width > (int)vkdev->info.max_image_dimension_3d || height > (int)vkdev->info.max_image_dimension_3d || depth > (int)vkdev->info.max_image_dimension_3d) | |||
| { | |||
| NCNN_LOGE("image dimension too large %d %d %d > %d", width, height, depth, (int)vkdev->info.max_image_dimension_3d); | |||
| return 0; | |||
| } | |||
| NCNN_LOGE("image dimension too large %d %d %d > %d", width, height, depth, (int)vkdev->info.max_image_dimension_3d); | |||
| return 0; | |||
| } | |||
| VkImageMemory* ptr = new VkImageMemory; | |||
| ptr->image = create_image(image_type, width, height, depth, format, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); | |||
| ptr->image = create_image(width, height, depth, format, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); | |||
| ptr->image_type = image_type; | |||
| ptr->imageview_type = imageview_type; | |||
| ptr->width = width; | |||
| ptr->height = height; | |||
| ptr->depth = depth; | |||
| @@ -827,7 +795,7 @@ VkImageMemory* VkBlobAllocator::fastMalloc(int dims, int w, int h, int c, size_t | |||
| // do not allow host access to optimal tiling image | |||
| ptr->mapped_ptr = 0; | |||
| ptr->imageview = create_imageview(imageview_type, ptr->image, format); | |||
| ptr->imageview = create_imageview(ptr->image, format); | |||
| ptr->access_flags = 0; | |||
| ptr->image_layout = VK_IMAGE_LAYOUT_UNDEFINED; | |||
| @@ -894,7 +862,7 @@ VkImageMemory* VkBlobAllocator::fastMalloc(int dims, int w, int h, int c, size_t | |||
| // do not allow host access to optimal tiling image | |||
| ptr->mapped_ptr = 0; | |||
| ptr->imageview = create_imageview(imageview_type, ptr->image, format); | |||
| ptr->imageview = create_imageview(ptr->image, format); | |||
| ptr->access_flags = 0; | |||
| ptr->image_layout = VK_IMAGE_LAYOUT_UNDEFINED; | |||
| @@ -1244,7 +1212,7 @@ void VkWeightAllocator::fastFree(VkBufferMemory* ptr) | |||
| delete ptr; | |||
| } | |||
| VkImageMemory* VkWeightAllocator::fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack) | |||
| VkImageMemory* VkWeightAllocator::fastMalloc(int w, int h, int c, size_t elemsize, int elempack) | |||
| { | |||
| if (elempack != 1 && elempack != 4 && elempack != 8 && elempack != 16 && elempack != 32 && elempack != 64) | |||
| { | |||
| @@ -1287,48 +1255,16 @@ VkImageMemory* VkWeightAllocator::fastMalloc(int dims, int w, int h, int c, size | |||
| if (elempack == 32) width *= 8; | |||
| if (elempack == 64) width *= 16; | |||
| VkImageType image_type; | |||
| VkImageViewType imageview_type; | |||
| if (dims == 1) | |||
| if (width > (int)vkdev->info.max_image_dimension_3d || height > (int)vkdev->info.max_image_dimension_3d || depth > (int)vkdev->info.max_image_dimension_3d) | |||
| { | |||
| image_type = VK_IMAGE_TYPE_1D; | |||
| imageview_type = VK_IMAGE_VIEW_TYPE_1D; | |||
| if (width > (int)vkdev->info.max_image_dimension_1d) | |||
| { | |||
| NCNN_LOGE("image dimension too large %d > %d", width, (int)vkdev->info.max_image_dimension_1d); | |||
| return 0; | |||
| } | |||
| } | |||
| else if (dims == 2) | |||
| { | |||
| image_type = VK_IMAGE_TYPE_2D; | |||
| imageview_type = VK_IMAGE_VIEW_TYPE_2D; | |||
| if (width > (int)vkdev->info.max_image_dimension_2d || height > (int)vkdev->info.max_image_dimension_2d) | |||
| { | |||
| NCNN_LOGE("image dimension too large %d %d > %d", width, height, (int)vkdev->info.max_image_dimension_2d); | |||
| return 0; | |||
| } | |||
| } | |||
| else // if (dims == 3) | |||
| { | |||
| image_type = VK_IMAGE_TYPE_3D; | |||
| imageview_type = VK_IMAGE_VIEW_TYPE_3D; | |||
| if (width > (int)vkdev->info.max_image_dimension_3d || height > (int)vkdev->info.max_image_dimension_3d || depth > (int)vkdev->info.max_image_dimension_3d) | |||
| { | |||
| NCNN_LOGE("image dimension too large %d %d %d > %d", width, height, depth, (int)vkdev->info.max_image_dimension_3d); | |||
| return 0; | |||
| } | |||
| NCNN_LOGE("image dimension too large %d %d %d > %d", width, height, depth, (int)vkdev->info.max_image_dimension_3d); | |||
| return 0; | |||
| } | |||
| VkImageMemory* ptr = new VkImageMemory; | |||
| ptr->image = create_image(image_type, width, height, depth, format, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); | |||
| ptr->image = create_image(width, height, depth, format, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); | |||
| ptr->image_type = image_type; | |||
| ptr->imageview_type = imageview_type; | |||
| ptr->width = width; | |||
| ptr->height = height; | |||
| ptr->depth = depth; | |||
| @@ -1385,7 +1321,7 @@ VkImageMemory* VkWeightAllocator::fastMalloc(int dims, int w, int h, int c, size | |||
| // do not allow host access to optimal tiling image | |||
| ptr->mapped_ptr = 0; | |||
| ptr->imageview = create_imageview(imageview_type, ptr->image, format); | |||
| ptr->imageview = create_imageview(ptr->image, format); | |||
| ptr->access_flags = 0; | |||
| ptr->image_layout = VK_IMAGE_LAYOUT_UNDEFINED; | |||
| @@ -1426,7 +1362,7 @@ VkImageMemory* VkWeightAllocator::fastMalloc(int dims, int w, int h, int c, size | |||
| // do not allow host access to optimal tiling image | |||
| ptr->mapped_ptr = 0; | |||
| ptr->imageview = create_imageview(imageview_type, ptr->image, format); | |||
| ptr->imageview = create_imageview(ptr->image, format); | |||
| ptr->access_flags = 0; | |||
| ptr->image_layout = VK_IMAGE_LAYOUT_UNDEFINED; | |||
| @@ -1482,7 +1418,7 @@ VkImageMemory* VkWeightAllocator::fastMalloc(int dims, int w, int h, int c, size | |||
| // do not allow host access to optimal tiling image | |||
| ptr->mapped_ptr = 0; | |||
| ptr->imageview = create_imageview(imageview_type, ptr->image, format); | |||
| ptr->imageview = create_imageview(ptr->image, format); | |||
| ptr->access_flags = 0; | |||
| ptr->image_layout = VK_IMAGE_LAYOUT_UNDEFINED; | |||
| @@ -1612,36 +1548,16 @@ void VkStagingAllocator::fastFree(VkBufferMemory* ptr) | |||
| buffer_budgets.push_back(ptr); | |||
| } | |||
| VkImageMemory* VkStagingAllocator::fastMalloc(int dims, int w, int h, int c, size_t elemsize, int /* elempack */) | |||
| VkImageMemory* VkStagingAllocator::fastMalloc(int w, int h, int c, size_t elemsize, int /* elempack */) | |||
| { | |||
| // staging image is mainly used for storing small piece of dynamic parameters | |||
| // we allocate host memory as a fake image, it's simple and good | |||
| const size_t size = w * h * c * elemsize; | |||
| VkImageType image_type; | |||
| VkImageViewType imageview_type; | |||
| if (dims == 1) | |||
| { | |||
| image_type = VK_IMAGE_TYPE_1D; | |||
| imageview_type = VK_IMAGE_VIEW_TYPE_1D; | |||
| } | |||
| else if (dims == 2) | |||
| { | |||
| image_type = VK_IMAGE_TYPE_2D; | |||
| imageview_type = VK_IMAGE_VIEW_TYPE_2D; | |||
| } | |||
| else // if (dims == 3) | |||
| { | |||
| image_type = VK_IMAGE_TYPE_3D; | |||
| imageview_type = VK_IMAGE_VIEW_TYPE_3D; | |||
| } | |||
| VkImageMemory* ptr = new VkImageMemory; | |||
| ptr->image = 0; | |||
| ptr->image_type = image_type; | |||
| ptr->imageview_type = imageview_type; | |||
| ptr->width = w; | |||
| ptr->height = h; | |||
| ptr->depth = c; | |||
| @@ -1746,7 +1662,7 @@ VkAndroidHardwareBufferImageAllocator::~VkAndroidHardwareBufferImageAllocator() | |||
| } | |||
| } | |||
| VkImageMemory* VkAndroidHardwareBufferImageAllocator::fastMalloc(int /*dims*/, int /*w*/, int /*h*/, int /*c*/, size_t /*elemsize*/, int /*elempack*/) | |||
| VkImageMemory* VkAndroidHardwareBufferImageAllocator::fastMalloc(int /*w*/, int /*h*/, int /*c*/, size_t /*elemsize*/, int /*elempack*/) | |||
| { | |||
| VkResult ret; | |||
| @@ -224,8 +224,6 @@ public: | |||
| VkImageView imageview; | |||
| // underlying info assigned by allocator | |||
| VkImageType image_type; | |||
| VkImageViewType imageview_type; | |||
| int width; | |||
| int height; | |||
| int depth; | |||
| @@ -267,7 +265,7 @@ public: | |||
| virtual int flush(VkBufferMemory* ptr); | |||
| virtual int invalidate(VkBufferMemory* ptr); | |||
| virtual VkImageMemory* fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack) = 0; | |||
| virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack) = 0; | |||
| virtual void fastFree(VkImageMemory* ptr) = 0; | |||
| public: | |||
| @@ -282,8 +280,8 @@ protected: | |||
| VkDeviceMemory allocate_memory(size_t size, uint32_t memory_type_index); | |||
| VkDeviceMemory allocate_dedicated_memory(size_t size, uint32_t memory_type_index, VkImage image, VkBuffer buffer); | |||
| VkImage create_image(VkImageType type, int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage); | |||
| VkImageView create_imageview(VkImageViewType type, VkImage image, VkFormat format); | |||
| VkImage create_image(int width, int height, int depth, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage); | |||
| VkImageView create_imageview(VkImage image, VkFormat format); | |||
| }; | |||
| class VkBlobAllocator : public VkAllocator | |||
| @@ -298,7 +296,7 @@ public: | |||
| virtual VkBufferMemory* fastMalloc(size_t size); | |||
| virtual void fastFree(VkBufferMemory* ptr); | |||
| virtual VkImageMemory* fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack); | |||
| virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); | |||
| virtual void fastFree(VkImageMemory* ptr); | |||
| protected: | |||
| @@ -324,7 +322,7 @@ public: | |||
| public: | |||
| virtual VkBufferMemory* fastMalloc(size_t size); | |||
| virtual void fastFree(VkBufferMemory* ptr); | |||
| virtual VkImageMemory* fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack); | |||
| virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); | |||
| virtual void fastFree(VkImageMemory* ptr); | |||
| protected: | |||
| @@ -355,7 +353,7 @@ public: | |||
| virtual VkBufferMemory* fastMalloc(size_t size); | |||
| virtual void fastFree(VkBufferMemory* ptr); | |||
| virtual VkImageMemory* fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack); | |||
| virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); | |||
| virtual void fastFree(VkImageMemory* ptr); | |||
| protected: | |||
| @@ -372,7 +370,7 @@ public: | |||
| public: | |||
| virtual VkBufferMemory* fastMalloc(size_t size); | |||
| virtual void fastFree(VkBufferMemory* ptr); | |||
| virtual VkImageMemory* fastMalloc(int /*dims*/, int /*w*/, int /*h*/, int /*c*/, size_t /*elemsize*/, int /*elempack*/) | |||
| virtual VkImageMemory* fastMalloc(int /*w*/, int /*h*/, int /*c*/, size_t /*elemsize*/, int /*elempack*/) | |||
| { | |||
| return 0; | |||
| } | |||
| @@ -392,7 +390,7 @@ public: | |||
| virtual ~VkAndroidHardwareBufferImageAllocator(); | |||
| public: | |||
| virtual VkImageMemory* fastMalloc(int dims, int w, int h, int c, size_t elemsize, int elempack); | |||
| virtual VkImageMemory* fastMalloc(int w, int h, int c, size_t elemsize, int elempack); | |||
| virtual void fastFree(VkImageMemory* ptr); | |||
| virtual VkBufferMemory* fastMalloc(size_t /*size*/) | |||
| { | |||
| @@ -641,16 +641,9 @@ int create_gpu_instance() | |||
| // 650 = 0x5143 0x6050002 | |||
| gpu_info.bug_storage_buffer_no_l1 = false; | |||
| gpu_info.bug_layout_binding_id_alias = false; | |||
| gpu_info.bug_corrupted_online_pipeline_cache = false; | |||
| gpu_info.bug_implicit_fp16_arithmetic = false; | |||
| if (physicalDeviceProperties.vendorID == 0x5143 && physicalDeviceProperties.apiVersion < VK_MAKE_VERSION(1, 0, 49)) | |||
| { | |||
| // qcom adreno with old buggy driver cannot handle binding id alias | |||
| gpu_info.bug_layout_binding_id_alias = true; | |||
| } | |||
| if (physicalDeviceProperties.vendorID == 0x5143 && physicalDeviceProperties.apiVersion < VK_MAKE_VERSION(1, 0, 66)) | |||
| { | |||
| // qcom adreno with old buggy driver cannot share created pipeline properly | |||
| @@ -668,22 +661,6 @@ int create_gpu_instance() | |||
| // gpu_info.bug_storage_buffer_no_l1 = true; | |||
| } | |||
| if (physicalDeviceProperties.vendorID == 0x13b5 | |||
| && (physicalDeviceProperties.deviceID == 0x7500001 | |||
| || physicalDeviceProperties.deviceID == 0x8602000 | |||
| || physicalDeviceProperties.deviceID == 0x8800020)) | |||
| { | |||
| // these arm mali midgard era driver cannot handle binding id alias | |||
| gpu_info.bug_layout_binding_id_alias = true; | |||
| } | |||
| #if __APPLE__ | |||
| { | |||
| // metal shader never accept binding id alias | |||
| gpu_info.bug_layout_binding_id_alias = true; | |||
| } | |||
| #endif | |||
| if (physicalDeviceProperties.vendorID == 0x13b5 | |||
| && (physicalDeviceProperties.deviceID == 0x7500001 | |||
| || physicalDeviceProperties.deviceID == 0x8602000 | |||
| @@ -997,8 +974,8 @@ int create_gpu_instance() | |||
| gpu_info.graphics_queue_family_index, gpu_info.graphics_queue_count, | |||
| gpu_info.transfer_queue_family_index, gpu_info.transfer_queue_count); | |||
| NCNN_LOGE("[%u %s] bugsbn1=%d buglbia=%d bugcopc=%d bugihfa=%d", i, physicalDeviceProperties.deviceName, | |||
| gpu_info.bug_storage_buffer_no_l1, gpu_info.bug_layout_binding_id_alias, gpu_info.bug_corrupted_online_pipeline_cache, gpu_info.bug_implicit_fp16_arithmetic); | |||
| NCNN_LOGE("[%u %s] bugsbn1=%d bugcopc=%d bugihfa=%d", i, physicalDeviceProperties.deviceName, | |||
| gpu_info.bug_storage_buffer_no_l1, gpu_info.bug_corrupted_online_pipeline_cache, gpu_info.bug_implicit_fp16_arithmetic); | |||
| NCNN_LOGE("[%u %s] fp16p=%d fp16s=%d fp16a=%d int8s=%d int8a=%d", i, physicalDeviceProperties.deviceName, | |||
| gpu_info.support_fp16_packed, gpu_info.support_fp16_storage, gpu_info.support_fp16_arithmetic, | |||
| @@ -2031,12 +2008,6 @@ void VulkanDevice::convert_packing(const VkMat& src, VkMat& dst, int dst_elempac | |||
| void VulkanDevice::convert_packing(const VkImageMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const | |||
| { | |||
| if (info.bug_layout_binding_id_alias) | |||
| { | |||
| NCNN_LOGE("cannot convert_packing i2i"); | |||
| return; | |||
| } | |||
| int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed ? 1 : 0; | |||
| int packing_type_to_index = dst_elempack == 1 ? 0 : dst_elempack == 4 ? 1 : 2; | |||
| @@ -2069,12 +2040,6 @@ void VulkanDevice::convert_packing(const VkImageMat& src, VkImageMat& dst, int d | |||
| void VulkanDevice::convert_packing(const VkMat& src, VkImageMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const | |||
| { | |||
| if (info.bug_layout_binding_id_alias) | |||
| { | |||
| NCNN_LOGE("cannot convert_packing b2i"); | |||
| return; | |||
| } | |||
| int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed ? 1 : 0; | |||
| int packing_type_to_index = dst_elempack == 1 ? 0 : dst_elempack == 4 ? 1 : 2; | |||
| @@ -2107,12 +2072,6 @@ void VulkanDevice::convert_packing(const VkMat& src, VkImageMat& dst, int dst_el | |||
| void VulkanDevice::convert_packing(const VkImageMat& src, VkMat& dst, int dst_elempack, VkCompute& cmd, const Option& opt) const | |||
| { | |||
| if (info.bug_layout_binding_id_alias) | |||
| { | |||
| NCNN_LOGE("cannot convert_packing i2b"); | |||
| return; | |||
| } | |||
| int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed ? 1 : 0; | |||
| int packing_type_to_index = dst_elempack == 1 ? 0 : dst_elempack == 4 ? 1 : 2; | |||
| @@ -2359,12 +2318,6 @@ const ncnn::Packing_vulkan* VulkanDevice::get_utility_operator(int storage_type_ | |||
| opt.use_fp16_packed = (cast_type_from_index == 1 || cast_type_to_index == 1); | |||
| opt.use_fp16_storage = (cast_type_from_index == 2 || cast_type_to_index == 2); | |||
| if (info.bug_layout_binding_id_alias && opt.use_image_storage) | |||
| { | |||
| NCNN_LOGE("cannot create uop with use_image_storage if bug_layout_binding_id_alias"); | |||
| return 0; | |||
| } | |||
| if (!info.support_fp16_packed && opt.use_fp16_packed) | |||
| { | |||
| NCNN_LOGE("cannot create uop with use_fp16_packed if not support_fp16_packed"); | |||
| @@ -2424,8 +2377,6 @@ void VulkanDevice::destroy_utility_operator() | |||
| for (int i1 = 0; i1 < 2; i1++) | |||
| { | |||
| opt.use_image_storage = (i0 == 1 || i1 == 1); | |||
| if (info.bug_layout_binding_id_alias && opt.use_image_storage) | |||
| continue; | |||
| // from fp32-b/i | fp16p-b/i | fp16s-b/i | |||
| // to fp32-b/i | fp16p-b/i | fp16s-b/i | |||
| @@ -129,7 +129,6 @@ public: | |||
| // bug is not feature | |||
| bool bug_storage_buffer_no_l1; | |||
| bool bug_layout_binding_id_alias; | |||
| bool bug_corrupted_online_pipeline_cache; | |||
| // but sometimes bug is a feature | |||
| @@ -29,11 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; | |||
| @@ -58,19 +54,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afp v; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v = image1d_ld1(bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -80,18 +64,7 @@ void main() | |||
| v = abs(v); | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st1(top_blob_1d, gx, v); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy), v); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| } | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| #else | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| #endif | |||
| @@ -29,11 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| @@ -58,19 +54,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec4 v; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v = image1d_ld4(bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -80,18 +64,7 @@ void main() | |||
| v = abs(v); | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st4(top_blob_1d, gx, v); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st4(top_blob_2d, ivec2(gx, gy), v); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| } | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| #else | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| #endif | |||
| @@ -30,11 +30,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| @@ -59,19 +55,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec8 v; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v = image1d_ld8(bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -82,18 +66,7 @@ void main() | |||
| v[1] = abs(v[1]); | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st8(top_blob_1d, gx, v); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st8(top_blob_2d, ivec2(gx, gy), v); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| } | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| #else | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| #endif | |||
| @@ -29,14 +29,10 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; | |||
| layout (binding = 2) uniform unfp sampler1D a; | |||
| layout (binding = 3) uniform unfp sampler1D b; | |||
| layout (binding = 2) uniform unfp sampler3D a; | |||
| layout (binding = 3) uniform unfp sampler3D b; | |||
| #else | |||
| layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; | |||
| layout (binding = 1) readonly buffer a { sfp a_data[]; }; | |||
| @@ -62,19 +58,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afp v; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v = image1d_ld1(bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -85,8 +69,8 @@ void main() | |||
| const int abi = gxyz[psc(dims) - 1]; | |||
| #if NCNN_image_shader | |||
| afp b = image1d_ld1(b, abi); | |||
| afp a = image1d_ld1(a, abi); | |||
| afp b = image3d_ld1(b, ivec3(abi, 0, 0)); | |||
| afp a = image3d_ld1(a, ivec3(abi, 0, 0)); | |||
| #else | |||
| afp b = buffer_ld1(b_data, abi); | |||
| afp a = buffer_ld1(a_data, abi); | |||
| @@ -95,18 +79,7 @@ void main() | |||
| v = b * v + a; | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st1(top_blob_1d, gx, v); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy), v); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| } | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| #else | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| #endif | |||
| @@ -29,14 +29,10 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| layout (binding = 2) uniform unfp sampler1D a; | |||
| layout (binding = 3) uniform unfp sampler1D b; | |||
| layout (binding = 2) uniform unfp sampler3D a; | |||
| layout (binding = 3) uniform unfp sampler3D b; | |||
| #else | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| layout (binding = 1) readonly buffer a { sfpvec4 a_data[]; }; | |||
| @@ -62,19 +58,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec4 v; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v = image1d_ld4(bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -85,8 +69,8 @@ void main() | |||
| const int abi = gxyz[psc(dims) - 1]; | |||
| #if NCNN_image_shader | |||
| afpvec4 b = image1d_ld4(b, abi); | |||
| afpvec4 a = image1d_ld4(a, abi); | |||
| afpvec4 b = image3d_ld4(b, ivec3(abi, 0, 0)); | |||
| afpvec4 a = image3d_ld4(a, ivec3(abi, 0, 0)); | |||
| #else | |||
| afpvec4 b = buffer_ld4(b_data, abi); | |||
| afpvec4 a = buffer_ld4(a_data, abi); | |||
| @@ -95,18 +79,7 @@ void main() | |||
| v = b * v + a; | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st4(top_blob_1d, gx, v); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st4(top_blob_2d, ivec2(gx, gy), v); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| } | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| #else | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| #endif | |||
| @@ -30,14 +30,10 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| layout (binding = 2) uniform unfp sampler1D a; | |||
| layout (binding = 3) uniform unfp sampler1D b; | |||
| layout (binding = 2) uniform unfp sampler3D a; | |||
| layout (binding = 3) uniform unfp sampler3D b; | |||
| #else | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| layout (binding = 1) readonly buffer a { sfpvec8 a_data[]; }; | |||
| @@ -63,19 +59,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec8 v; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v = image1d_ld8(bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -86,8 +70,8 @@ void main() | |||
| const int abi = gxyz[psc(dims) - 1]; | |||
| #if NCNN_image_shader | |||
| afpvec8 b = image1d_ld8(b, abi); | |||
| afpvec8 a = image1d_ld8(a, abi); | |||
| afpvec8 b = image3d_ld8(b, ivec3(abi, 0, 0)); | |||
| afpvec8 a = image3d_ld8(a, ivec3(abi, 0, 0)); | |||
| #else | |||
| afpvec8 b = buffer_ld8(b_data, abi); | |||
| afpvec8 a = buffer_ld8(a_data, abi); | |||
| @@ -97,18 +81,7 @@ void main() | |||
| v[1] = b[1] * v[1] + a[1]; | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st8(top_blob_1d, gx, v); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st8(top_blob_2d, ivec2(gx, gy), v); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| } | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| #else | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| #endif | |||
| @@ -45,14 +45,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D a_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D a_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D a_blob_3d; | |||
| layout (binding = 1) uniform unfp sampler1D b_blob_1d; | |||
| layout (binding = 1) uniform unfp sampler2D b_blob_2d; | |||
| layout (binding = 1) uniform unfp sampler3D b_blob_3d; | |||
| layout (binding = 2, imfmtc1) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 2, imfmtc1) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 2, imfmtc1) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) buffer a_blob { sfp a_blob_data[]; }; | |||
| @@ -91,20 +85,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afp v1; | |||
| if (psc(outdims) == 1) | |||
| { | |||
| v1 = image1d_ld1(a_blob_1d, gx); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| v1 = image2d_ld1(a_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afp v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -129,18 +110,7 @@ void main() | |||
| if (op_type == 8) res = b / v1; | |||
| #if NCNN_image_shader | |||
| if (psc(outdims) == 1) | |||
| { | |||
| image1d_st1(top_blob_1d, gx, res); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy), res); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| } | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| #else | |||
| buffer_st1(a_blob_data, gi, res); | |||
| #endif | |||
| @@ -149,20 +119,7 @@ void main() | |||
| { | |||
| // type 7 13 19 | |||
| #if NCNN_image_shader | |||
| afp v2; | |||
| if (psc(outdims) == 1) | |||
| { | |||
| v2 = image1d_ld1(b_blob_1d, gx); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| v2 = image2d_ld1(b_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afp v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| afp v2 = buffer_ld1(b_blob_data, gi); | |||
| #endif | |||
| @@ -178,18 +135,7 @@ void main() | |||
| if (op_type == 8) res = v2 / v1; | |||
| #if NCNN_image_shader | |||
| if (psc(outdims) == 1) | |||
| { | |||
| image1d_st1(top_blob_1d, gx, res); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy), res); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| } | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| #else | |||
| buffer_st1(top_blob_data, gi, res); | |||
| #endif | |||
| @@ -43,14 +43,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D a_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D a_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D a_blob_3d; | |||
| layout (binding = 1) uniform unfp sampler1D b_blob_1d; | |||
| layout (binding = 1) uniform unfp sampler2D b_blob_2d; | |||
| layout (binding = 1) uniform unfp sampler3D b_blob_3d; | |||
| layout (binding = 2, imfmtc1) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 2, imfmtc1) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 2, imfmtc1) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer a_blob { sfp a_blob_data[]; }; | |||
| @@ -89,8 +83,12 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afp v1; | |||
| afp v2; | |||
| int ax = gx; | |||
| int ay = gy; | |||
| int az = gz; | |||
| int bx = gx; | |||
| int by = gy; | |||
| int bz = gz; | |||
| if (psc(adims) == 3) | |||
| { | |||
| @@ -99,37 +97,36 @@ void main() | |||
| if (psc(bw) == 1 && psc(bh) == 1) | |||
| { | |||
| // special type 1 | |||
| v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz)); | |||
| v2 = image3d_ld1(b_blob_3d, ivec3(0, 0, gz)); | |||
| bx = 0; | |||
| by = 0; | |||
| } | |||
| if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(bc) == 1) | |||
| { | |||
| // special type 2 | |||
| v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz)); | |||
| v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, 0)); | |||
| bz = 0; | |||
| } | |||
| if (psc(aw) == 1 && psc(ah) == 1) | |||
| { | |||
| // special type 3 | |||
| v1 = image3d_ld1(a_blob_3d, ivec3(0, 0, gz)); | |||
| v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz)); | |||
| ax = 0; | |||
| ay = 0; | |||
| } | |||
| if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1) | |||
| { | |||
| // special type 4 | |||
| v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, 0)); | |||
| v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz)); | |||
| az = 0; | |||
| } | |||
| } | |||
| if (psc(bdims) == 2) | |||
| { | |||
| // type 18 | |||
| v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz)); | |||
| v2 = image2d_ld1(b_blob_2d, ivec2(gy, gz)); | |||
| bx = gy; | |||
| by = gz; | |||
| bz = 0; | |||
| } | |||
| if (psc(bdims) == 1) | |||
| @@ -137,14 +134,16 @@ void main() | |||
| if (psc(bw) == 1) | |||
| { | |||
| // type 16 | |||
| v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz)); | |||
| v2 = image1d_ld1(b_blob_1d, 0); | |||
| bx = 0; | |||
| by = 0; | |||
| bz = 0; | |||
| } | |||
| else | |||
| { | |||
| // type 17 | |||
| v1 = image3d_ld1(a_blob_3d, ivec3(gx, gy, gz)); | |||
| v2 = image1d_ld1(b_blob_1d, gz); | |||
| bx = gz; | |||
| by = 0; | |||
| bz = 0; | |||
| } | |||
| } | |||
| } | |||
| @@ -153,8 +152,9 @@ void main() | |||
| if (psc(bdims) == 3) | |||
| { | |||
| // type 14 | |||
| v1 = image2d_ld1(a_blob_2d, ivec2(gy, gz)); | |||
| v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz)); | |||
| ax = gy; | |||
| ay = gz; | |||
| az = 0; | |||
| } | |||
| if (psc(bdims) == 1) | |||
| @@ -162,67 +162,61 @@ void main() | |||
| if (psc(bw) == 1) | |||
| { | |||
| // type 11 | |||
| v1 = image2d_ld1(a_blob_2d, ivec2(gx, gy)); | |||
| v2 = image1d_ld1(b_blob_1d, 0); | |||
| bx = 0; | |||
| by = 0; | |||
| bz = 0; | |||
| } | |||
| else | |||
| { | |||
| // type 12 | |||
| v1 = image2d_ld1(a_blob_2d, ivec2(gx, gy)); | |||
| v2 = image1d_ld1(b_blob_1d, gy); | |||
| bx = gy; | |||
| by = 0; | |||
| bz = 0; | |||
| } | |||
| } | |||
| } | |||
| else if (psc(adims) == 1) | |||
| { | |||
| if (psc(bdims) == 3) | |||
| if (psc(aw) == 1) | |||
| { | |||
| if (psc(aw) == 1) | |||
| { | |||
| // type 4 | |||
| v1 = image1d_ld1(a_blob_1d, 0); | |||
| v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| else | |||
| { | |||
| // type 9 | |||
| v1 = image1d_ld1(a_blob_1d, gz); | |||
| v2 = image3d_ld1(b_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| // type 2 3 4 | |||
| ax = 0; | |||
| ay = 0; | |||
| az = 0; | |||
| } | |||
| if (psc(bdims) == 2) | |||
| else | |||
| { | |||
| if (psc(aw) == 1) | |||
| if (psc(bdims) == 3) | |||
| { | |||
| // type 3 | |||
| v1 = image1d_ld1(a_blob_1d, 0); | |||
| v2 = image2d_ld1(b_blob_2d, ivec2(gx, gy)); | |||
| // type 9 | |||
| ax = gz; | |||
| ay = 0; | |||
| az = 0; | |||
| } | |||
| else | |||
| if (psc(bdims) == 2) | |||
| { | |||
| // type 8 | |||
| v1 = image1d_ld1(a_blob_1d, gy); | |||
| v2 = image2d_ld1(b_blob_2d, ivec2(gx, gy)); | |||
| ax = gy; | |||
| ay = 0; | |||
| az = 0; | |||
| } | |||
| } | |||
| if (psc(bdims) == 1) | |||
| { | |||
| if (psc(aw) == 1) | |||
| { | |||
| // type 2 | |||
| v1 = image1d_ld1(a_blob_1d, 0); | |||
| v2 = image1d_ld1(b_blob_1d, gx); | |||
| } | |||
| else // if (psc(bw) == 1) | |||
| if (psc(bdims) == 1) | |||
| { | |||
| // type 6 | |||
| v1 = image1d_ld1(a_blob_1d, gx); | |||
| v2 = image1d_ld1(b_blob_1d, 0); | |||
| if (psc(bw) == 1) | |||
| { | |||
| // type 6 | |||
| bx = 0; | |||
| by = 0; | |||
| bz = 0; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| afp v1 = image3d_ld1(a_blob_3d, ivec3(ax, ay, az)); | |||
| afp v2 = image3d_ld1(b_blob_3d, ivec3(bx, by, bz)); | |||
| #else | |||
| const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -363,18 +357,7 @@ void main() | |||
| if (op_type == 8) res = v2 / v1; | |||
| #if NCNN_image_shader | |||
| if (psc(outdims) == 1) | |||
| { | |||
| image1d_st1(top_blob_1d, gx, res); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy), res); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| } | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| #else | |||
| buffer_st1(top_blob_data, gi, res); | |||
| #endif | |||
| @@ -43,13 +43,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D a_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler3D a_blob_3d; | |||
| layout (binding = 1) uniform unfp sampler1D b_blob_1d; | |||
| layout (binding = 1) uniform unfp sampler2D b_blob_2d; | |||
| layout (binding = 1) uniform unfp sampler3D b_blob_3d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer a_blob { sfp a_blob_data[]; }; | |||
| @@ -89,7 +84,6 @@ void main() | |||
| #if NCNN_image_shader | |||
| afpvec4 v1; | |||
| afpvec4 v2; | |||
| if (psc(adims) == 3 && psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1) | |||
| { | |||
| @@ -98,21 +92,10 @@ void main() | |||
| } | |||
| else | |||
| { | |||
| v1 = afpvec4(image1d_ld1(a_blob_1d, 0)); | |||
| v1 = afpvec4(image3d_ld1(a_blob_3d, ivec3(0, 0, 0))); | |||
| } | |||
| if (psc(bdims) == 1) | |||
| { | |||
| v2 = image1d_ld4(b_blob_1d, gx); | |||
| } | |||
| else if (psc(bdims) == 2) | |||
| { | |||
| v2 = image2d_ld4(b_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(bdims) == 3) | |||
| { | |||
| v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec4 v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -142,18 +125,7 @@ void main() | |||
| if (op_type == 8) res = v2 / v1; | |||
| #if NCNN_image_shader | |||
| if (psc(outdims) == 1) | |||
| { | |||
| image1d_st4(top_blob_1d, gx, res); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| image2d_st4(top_blob_2d, ivec2(gx, gy), res); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| } | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| #else | |||
| buffer_st4(top_blob_data, gi, res); | |||
| #endif | |||
| @@ -44,13 +44,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D a_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler3D a_blob_3d; | |||
| layout (binding = 1) uniform unfp sampler1D b_blob_1d; | |||
| layout (binding = 1) uniform unfp sampler2D b_blob_2d; | |||
| layout (binding = 1) uniform unfp sampler3D b_blob_3d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer a_blob { sfp a_blob_data[]; }; | |||
| @@ -90,7 +85,6 @@ void main() | |||
| #if NCNN_image_shader | |||
| afpvec4 v1; | |||
| afpvec8 v2; | |||
| if (psc(adims) == 3 && psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1) | |||
| { | |||
| @@ -99,21 +93,10 @@ void main() | |||
| } | |||
| else | |||
| { | |||
| v1 = afpvec4(image1d_ld1(a_blob_1d, 0)); | |||
| v1 = afpvec4(image3d_ld1(a_blob_3d, ivec3(0, 0, 0))); | |||
| } | |||
| if (psc(bdims) == 1) | |||
| { | |||
| v2 = image1d_ld8(b_blob_1d, gx); | |||
| } | |||
| else if (psc(bdims) == 2) | |||
| { | |||
| v2 = image2d_ld8(b_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(bdims) == 3) | |||
| { | |||
| v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec8 v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -179,18 +162,7 @@ void main() | |||
| } | |||
| #if NCNN_image_shader | |||
| if (psc(outdims) == 1) | |||
| { | |||
| image1d_st8(top_blob_1d, gx, res); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| image2d_st8(top_blob_2d, ivec2(gx, gy), res); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| } | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| #else | |||
| buffer_st8(top_blob_data, gi, res); | |||
| #endif | |||
| @@ -43,13 +43,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D a_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D a_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D a_blob_3d; | |||
| layout (binding = 1) uniform unfp sampler1D b_blob_1d; | |||
| layout (binding = 1) uniform unfp sampler3D b_blob_3d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer a_blob { sfpvec4 a_blob_data[]; }; | |||
| @@ -88,7 +83,6 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec4 v1; | |||
| afpvec4 v2; | |||
| if (psc(bdims) == 3 && psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(bc) == 1) | |||
| @@ -98,21 +92,10 @@ void main() | |||
| } | |||
| else | |||
| { | |||
| v2 = afpvec4(image1d_ld1(b_blob_1d, 0)); | |||
| v2 = afpvec4(image3d_ld1(b_blob_3d, ivec3(0, 0, 0))); | |||
| } | |||
| if (psc(adims) == 1) | |||
| { | |||
| v1 = image1d_ld4(a_blob_1d, gx); | |||
| } | |||
| else if (psc(adims) == 2) | |||
| { | |||
| v1 = image2d_ld4(a_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(adims) == 3) | |||
| { | |||
| v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec4 v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -142,18 +125,7 @@ void main() | |||
| if (op_type == 8) res = v2 / v1; | |||
| #if NCNN_image_shader | |||
| if (psc(outdims) == 1) | |||
| { | |||
| image1d_st4(top_blob_1d, gx, res); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| image2d_st4(top_blob_2d, ivec2(gx, gy), res); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| } | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| #else | |||
| buffer_st4(top_blob_data, gi, res); | |||
| #endif | |||
| @@ -44,13 +44,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D a_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D a_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D a_blob_3d; | |||
| layout (binding = 1) uniform unfp sampler1D b_blob_1d; | |||
| layout (binding = 1) uniform unfp sampler3D b_blob_3d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer a_blob { sfpvec8 a_blob_data[]; }; | |||
| @@ -89,7 +84,6 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec8 v1; | |||
| afpvec4 v2; | |||
| if (psc(bdims) == 3 && psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(bc) == 1) | |||
| @@ -99,21 +93,10 @@ void main() | |||
| } | |||
| else | |||
| { | |||
| v2 = afpvec4(image1d_ld1(b_blob_1d, 0)); | |||
| v2 = afpvec4(image3d_ld1(b_blob_3d, ivec3(0, 0, 0))); | |||
| } | |||
| if (psc(adims) == 1) | |||
| { | |||
| v1 = image1d_ld8(a_blob_1d, gx); | |||
| } | |||
| else if (psc(adims) == 2) | |||
| { | |||
| v1 = image2d_ld8(a_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(adims) == 3) | |||
| { | |||
| v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec8 v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -179,18 +162,7 @@ void main() | |||
| } | |||
| #if NCNN_image_shader | |||
| if (psc(outdims) == 1) | |||
| { | |||
| image1d_st8(top_blob_1d, gx, res); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| image2d_st8(top_blob_2d, ivec2(gx, gy), res); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| } | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| #else | |||
| buffer_st8(top_blob_data, gi, res); | |||
| #endif | |||
| @@ -43,14 +43,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D a_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D a_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D a_blob_3d; | |||
| layout (binding = 1) uniform unfp sampler1D b_blob_1d; | |||
| layout (binding = 1) uniform unfp sampler2D b_blob_2d; | |||
| layout (binding = 1) uniform unfp sampler3D b_blob_3d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer a_blob { sfpvec4 a_blob_data[]; }; | |||
| @@ -89,8 +83,12 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec4 v1; | |||
| afpvec4 v2; | |||
| int ax = gx; | |||
| int ay = gy; | |||
| int az = gz; | |||
| int bx = gx; | |||
| int by = gy; | |||
| int bz = gz; | |||
| if (psc(adims) == 3) | |||
| { | |||
| @@ -99,37 +97,36 @@ void main() | |||
| if (psc(bw) == 1 && psc(bh) == 1) | |||
| { | |||
| // special type 1 | |||
| v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); | |||
| v2 = image3d_ld4(b_blob_3d, ivec3(0, 0, gz)); | |||
| bx = 0; | |||
| by = 0; | |||
| } | |||
| if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(bc) == 1) | |||
| { | |||
| // special type 2 | |||
| v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); | |||
| v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, 0)); | |||
| bz = 0; | |||
| } | |||
| if (psc(aw) == 1 && psc(ah) == 1) | |||
| { | |||
| // special type 3 | |||
| v1 = image3d_ld4(a_blob_3d, ivec3(0, 0, gz)); | |||
| v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); | |||
| ax = 0; | |||
| ay = 0; | |||
| } | |||
| if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1) | |||
| { | |||
| // special type 4 | |||
| v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, 0)); | |||
| v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); | |||
| az = 0; | |||
| } | |||
| } | |||
| if (psc(bdims) == 2) | |||
| { | |||
| // type 18 | |||
| v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); | |||
| v2 = image2d_ld4(b_blob_2d, ivec2(gy, gz)); | |||
| bx = gy; | |||
| by = gz; | |||
| bz = 0; | |||
| } | |||
| if (psc(bdims) == 1) | |||
| @@ -137,14 +134,16 @@ void main() | |||
| if (psc(bw) == 1) | |||
| { | |||
| // type 16 | |||
| v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); | |||
| v2 = image1d_ld4(b_blob_1d, 0); | |||
| bx = 0; | |||
| by = 0; | |||
| bz = 0; | |||
| } | |||
| else | |||
| { | |||
| // type 17 | |||
| v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); | |||
| v2 = image1d_ld4(b_blob_1d, gz); | |||
| bx = gz; | |||
| by = 0; | |||
| bz = 0; | |||
| } | |||
| } | |||
| } | |||
| @@ -153,8 +152,9 @@ void main() | |||
| if (psc(bdims) == 3) | |||
| { | |||
| // type 14 | |||
| v1 = image2d_ld4(a_blob_2d, ivec2(gy, gz)); | |||
| v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); | |||
| ax = gy; | |||
| ay = gz; | |||
| az = 0; | |||
| } | |||
| if (psc(bdims) == 1) | |||
| @@ -162,67 +162,61 @@ void main() | |||
| if (psc(bw) == 1) | |||
| { | |||
| // type 11 | |||
| v1 = image2d_ld4(a_blob_2d, ivec2(gx, gy)); | |||
| v2 = image1d_ld4(b_blob_1d, 0); | |||
| bx = 0; | |||
| by = 0; | |||
| bz = 0; | |||
| } | |||
| else | |||
| { | |||
| // type 12 | |||
| v1 = image2d_ld4(a_blob_2d, ivec2(gx, gy)); | |||
| v2 = image1d_ld4(b_blob_1d, gy); | |||
| bx = gy; | |||
| by = 0; | |||
| bz = 0; | |||
| } | |||
| } | |||
| } | |||
| else if (psc(adims) == 1) | |||
| { | |||
| if (psc(bdims) == 3) | |||
| if (psc(aw) == 1) | |||
| { | |||
| if (psc(aw) == 1) | |||
| { | |||
| // type 4 | |||
| v1 = image1d_ld4(a_blob_1d, 0); | |||
| v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| else | |||
| { | |||
| // type 9 | |||
| v1 = image1d_ld4(a_blob_1d, gz); | |||
| v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| // type 2 3 4 | |||
| ax = 0; | |||
| ay = 0; | |||
| az = 0; | |||
| } | |||
| if (psc(bdims) == 2) | |||
| else | |||
| { | |||
| if (psc(aw) == 1) | |||
| if (psc(bdims) == 3) | |||
| { | |||
| // type 3 | |||
| v1 = image1d_ld4(a_blob_1d, 0); | |||
| v2 = image2d_ld4(b_blob_2d, ivec2(gx, gy)); | |||
| // type 9 | |||
| ax = gz; | |||
| ay = 0; | |||
| az = 0; | |||
| } | |||
| else | |||
| if (psc(bdims) == 2) | |||
| { | |||
| // type 8 | |||
| v1 = image1d_ld4(a_blob_1d, gy); | |||
| v2 = image2d_ld4(b_blob_2d, ivec2(gx, gy)); | |||
| ax = gy; | |||
| ay = 0; | |||
| az = 0; | |||
| } | |||
| } | |||
| if (psc(bdims) == 1) | |||
| { | |||
| if (psc(aw) == 1) | |||
| if (psc(bdims) == 1) | |||
| { | |||
| // type 2 | |||
| v1 = image1d_ld4(a_blob_1d, 0); | |||
| v2 = image1d_ld4(b_blob_1d, gx); | |||
| } | |||
| else // if (psc(bw) == 1) | |||
| { | |||
| // type 6 | |||
| v1 = image1d_ld4(a_blob_1d, gx); | |||
| v2 = image1d_ld4(b_blob_1d, 0); | |||
| if (psc(bw) == 1) | |||
| { | |||
| // type 6 | |||
| bx = 0; | |||
| by = 0; | |||
| bz = 0; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| afpvec4 v1 = image3d_ld4(a_blob_3d, ivec3(ax, ay, az)); | |||
| afpvec4 v2 = image3d_ld4(b_blob_3d, ivec3(bx, by, bz)); | |||
| #else | |||
| const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -312,18 +306,7 @@ void main() | |||
| if (op_type == 8) res = v2 / v1; | |||
| #if NCNN_image_shader | |||
| if (psc(outdims) == 1) | |||
| { | |||
| image1d_st4(top_blob_1d, gx, res); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| image2d_st4(top_blob_2d, ivec2(gx, gy), res); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| } | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| #else | |||
| buffer_st4(top_blob_data, gi, res); | |||
| #endif | |||
| @@ -44,14 +44,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D a_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D a_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D a_blob_3d; | |||
| layout (binding = 1) uniform unfp sampler1D b_blob_1d; | |||
| layout (binding = 1) uniform unfp sampler2D b_blob_2d; | |||
| layout (binding = 1) uniform unfp sampler3D b_blob_3d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer a_blob { sfpvec8 a_blob_data[]; }; | |||
| @@ -90,8 +84,12 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec8 v1; | |||
| afpvec8 v2; | |||
| int ax = gx; | |||
| int ay = gy; | |||
| int az = gz; | |||
| int bx = gx; | |||
| int by = gy; | |||
| int bz = gz; | |||
| if (psc(adims) == 3) | |||
| { | |||
| @@ -100,37 +98,36 @@ void main() | |||
| if (psc(bw) == 1 && psc(bh) == 1) | |||
| { | |||
| // special type 1 | |||
| v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); | |||
| v2 = image3d_ld8(b_blob_3d, ivec3(0, 0, gz)); | |||
| bx = 0; | |||
| by = 0; | |||
| } | |||
| if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(bc) == 1) | |||
| { | |||
| // special type 2 | |||
| v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); | |||
| v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, 0)); | |||
| bz = 0; | |||
| } | |||
| if (psc(aw) == 1 && psc(ah) == 1) | |||
| { | |||
| // special type 3 | |||
| v1 = image3d_ld8(a_blob_3d, ivec3(0, 0, gz)); | |||
| v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); | |||
| ax = 0; | |||
| ay = 0; | |||
| } | |||
| if (psc(bw) == psc(aw) && psc(bh) == psc(ah) && psc(ac) == 1) | |||
| { | |||
| // special type 4 | |||
| v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, 0)); | |||
| v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); | |||
| az = 0; | |||
| } | |||
| } | |||
| if (psc(bdims) == 2) | |||
| { | |||
| // type 18 | |||
| v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); | |||
| v2 = image2d_ld8(b_blob_2d, ivec2(gy, gz)); | |||
| bx = gy; | |||
| by = gz; | |||
| bz = 0; | |||
| } | |||
| if (psc(bdims) == 1) | |||
| @@ -138,14 +135,16 @@ void main() | |||
| if (psc(bw) == 1) | |||
| { | |||
| // type 16 | |||
| v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); | |||
| v2 = image1d_ld8(b_blob_1d, 0); | |||
| bx = 0; | |||
| by = 0; | |||
| bz = 0; | |||
| } | |||
| else | |||
| { | |||
| // type 17 | |||
| v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); | |||
| v2 = image1d_ld8(b_blob_1d, gz); | |||
| bx = gz; | |||
| by = 0; | |||
| bz = 0; | |||
| } | |||
| } | |||
| } | |||
| @@ -154,8 +153,9 @@ void main() | |||
| if (psc(bdims) == 3) | |||
| { | |||
| // type 14 | |||
| v1 = image2d_ld8(a_blob_2d, ivec2(gy, gz)); | |||
| v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); | |||
| ax = gy; | |||
| ay = gz; | |||
| az = 0; | |||
| } | |||
| if (psc(bdims) == 1) | |||
| @@ -163,67 +163,61 @@ void main() | |||
| if (psc(bw) == 1) | |||
| { | |||
| // type 11 | |||
| v1 = image2d_ld8(a_blob_2d, ivec2(gx, gy)); | |||
| v2 = image1d_ld8(b_blob_1d, 0); | |||
| bx = 0; | |||
| by = 0; | |||
| bz = 0; | |||
| } | |||
| else | |||
| { | |||
| // type 12 | |||
| v1 = image2d_ld8(a_blob_2d, ivec2(gx, gy)); | |||
| v2 = image1d_ld8(b_blob_1d, gy); | |||
| bx = gy; | |||
| by = 0; | |||
| bz = 0; | |||
| } | |||
| } | |||
| } | |||
| else if (psc(adims) == 1) | |||
| { | |||
| if (psc(bdims) == 3) | |||
| if (psc(aw) == 1) | |||
| { | |||
| if (psc(aw) == 1) | |||
| { | |||
| // type 4 | |||
| v1 = image1d_ld8(a_blob_1d, 0); | |||
| v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| else | |||
| { | |||
| // type 9 | |||
| v1 = image1d_ld8(a_blob_1d, gz); | |||
| v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| // type 2 3 4 | |||
| ax = 0; | |||
| ay = 0; | |||
| az = 0; | |||
| } | |||
| if (psc(bdims) == 2) | |||
| else | |||
| { | |||
| if (psc(aw) == 1) | |||
| if (psc(bdims) == 3) | |||
| { | |||
| // type 3 | |||
| v1 = image1d_ld8(a_blob_1d, 0); | |||
| v2 = image2d_ld8(b_blob_2d, ivec2(gx, gy)); | |||
| // type 9 | |||
| ax = gz; | |||
| ay = 0; | |||
| az = 0; | |||
| } | |||
| else | |||
| if (psc(bdims) == 2) | |||
| { | |||
| // type 8 | |||
| v1 = image1d_ld8(a_blob_1d, gy); | |||
| v2 = image2d_ld8(b_blob_2d, ivec2(gx, gy)); | |||
| ax = gy; | |||
| ay = 0; | |||
| az = 0; | |||
| } | |||
| } | |||
| if (psc(bdims) == 1) | |||
| { | |||
| if (psc(aw) == 1) | |||
| if (psc(bdims) == 1) | |||
| { | |||
| // type 2 | |||
| v1 = image1d_ld8(a_blob_1d, 0); | |||
| v2 = image1d_ld8(b_blob_1d, gx); | |||
| } | |||
| else // if (psc(bw) == 1) | |||
| { | |||
| // type 6 | |||
| v1 = image1d_ld8(a_blob_1d, gx); | |||
| v2 = image1d_ld8(b_blob_1d, 0); | |||
| if (psc(bw) == 1) | |||
| { | |||
| // type 6 | |||
| bx = 0; | |||
| by = 0; | |||
| bz = 0; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| afpvec8 v1 = image3d_ld8(a_blob_3d, ivec3(ax, ay, az)); | |||
| afpvec8 v2 = image3d_ld8(b_blob_3d, ivec3(bx, by, bz)); | |||
| #else | |||
| const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -349,18 +343,7 @@ void main() | |||
| } | |||
| #if NCNN_image_shader | |||
| if (psc(outdims) == 1) | |||
| { | |||
| image1d_st8(top_blob_1d, gx, res); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| image2d_st8(top_blob_2d, ivec2(gx, gy), res); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| } | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| #else | |||
| buffer_st8(top_blob_data, gi, res); | |||
| #endif | |||
| @@ -45,14 +45,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D a_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D a_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D a_blob_3d; | |||
| layout (binding = 1) uniform unfp sampler1D b_blob_1d; | |||
| layout (binding = 1) uniform unfp sampler2D b_blob_2d; | |||
| layout (binding = 1) uniform unfp sampler3D b_blob_3d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) buffer a_blob { sfpvec4 a_blob_data[]; }; | |||
| @@ -91,20 +85,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec4 v1; | |||
| if (psc(outdims) == 1) | |||
| { | |||
| v1 = image1d_ld4(a_blob_1d, gx); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| v1 = image2d_ld4(a_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec4 v1 = image3d_ld4(a_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -129,18 +110,7 @@ void main() | |||
| if (op_type == 8) res = b / v1; | |||
| #if NCNN_image_shader | |||
| if (psc(outdims) == 1) | |||
| { | |||
| image1d_st4(top_blob_1d, gx, res); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| image2d_st4(top_blob_2d, ivec2(gx, gy), res); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| } | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| #else | |||
| buffer_st4(a_blob_data, gi, res); | |||
| #endif | |||
| @@ -149,20 +119,7 @@ void main() | |||
| { | |||
| // type 7 13 19 | |||
| #if NCNN_image_shader | |||
| afpvec4 v2; | |||
| if (psc(outdims) == 1) | |||
| { | |||
| v2 = image1d_ld4(b_blob_1d, gx); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| v2 = image2d_ld4(b_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec4 v2 = image3d_ld4(b_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| afpvec4 v2 = buffer_ld4(b_blob_data, gi); | |||
| #endif | |||
| @@ -178,18 +135,7 @@ void main() | |||
| if (op_type == 8) res = v2 / v1; | |||
| #if NCNN_image_shader | |||
| if (psc(outdims) == 1) | |||
| { | |||
| image1d_st4(top_blob_1d, gx, res); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| image2d_st4(top_blob_2d, ivec2(gx, gy), res); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| } | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| #else | |||
| buffer_st4(top_blob_data, gi, res); | |||
| #endif | |||
| @@ -46,14 +46,8 @@ layout (constant_id = shape_constant_id_offset + 13) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 14) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D a_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D a_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D a_blob_3d; | |||
| layout (binding = 1) uniform unfp sampler1D b_blob_1d; | |||
| layout (binding = 1) uniform unfp sampler2D b_blob_2d; | |||
| layout (binding = 1) uniform unfp sampler3D b_blob_3d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) buffer a_blob { sfpvec8 a_blob_data[]; }; | |||
| @@ -92,20 +86,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec8 v1; | |||
| if (psc(outdims) == 1) | |||
| { | |||
| v1 = image1d_ld8(a_blob_1d, gx); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| v1 = image2d_ld8(a_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec8 v1 = image3d_ld8(a_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -166,18 +147,7 @@ void main() | |||
| } | |||
| #if NCNN_image_shader | |||
| if (psc(outdims) == 1) | |||
| { | |||
| image1d_st8(top_blob_1d, gx, res); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| image2d_st8(top_blob_2d, ivec2(gx, gy), res); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| } | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| #else | |||
| buffer_st8(a_blob_data, gi, res); | |||
| #endif | |||
| @@ -186,20 +156,7 @@ void main() | |||
| { | |||
| // type 7 13 19 | |||
| #if NCNN_image_shader | |||
| afpvec8 v2; | |||
| if (psc(outdims) == 1) | |||
| { | |||
| v2 = image1d_ld8(b_blob_1d, gx); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| v2 = image2d_ld8(b_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec8 v2 = image3d_ld8(b_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| afpvec8 v2 = buffer_ld8(b_blob_data, gi); | |||
| #endif | |||
| @@ -251,18 +208,7 @@ void main() | |||
| } | |||
| #if NCNN_image_shader | |||
| if (psc(outdims) == 1) | |||
| { | |||
| image1d_st8(top_blob_1d, gx, res); | |||
| } | |||
| else if (psc(outdims) == 2) | |||
| { | |||
| image2d_st8(top_blob_2d, ivec2(gx, gy), res); | |||
| } | |||
| else // if (psc(outdims) == 3) | |||
| { | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| } | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| #else | |||
| buffer_st8(top_blob_data, gi, res); | |||
| #endif | |||
| @@ -35,11 +35,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, r32f) writeonly uniform highp image1D top_blob_1d; | |||
| layout (binding = 1, r32f) writeonly uniform highp image2D top_blob_2d; | |||
| layout (binding = 1, r32f) writeonly uniform highp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| @@ -71,18 +67,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_cp1(top_blob_1d, gx, bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_cp1(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -35,11 +35,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, rgba32f) writeonly uniform highp image1D top_blob_1d; | |||
| layout (binding = 1, rgba32f) writeonly uniform highp image2D top_blob_2d; | |||
| layout (binding = 1, rgba32f) writeonly uniform highp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| @@ -71,18 +67,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_cp4(top_blob_1d, gx, bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_cp4(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -36,11 +36,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, rgba32f) writeonly uniform highp image1D top_blob_1d; | |||
| layout (binding = 1, rgba32f) writeonly uniform highp image2D top_blob_2d; | |||
| layout (binding = 1, rgba32f) writeonly uniform highp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| @@ -72,18 +68,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_cp8(top_blob_1d, gx, bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_cp8(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -35,11 +35,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform highp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform highp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform highp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { float bottom_blob_data[]; }; | |||
| @@ -71,18 +67,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_cp1(top_blob_1d, gx, bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_cp1(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -35,11 +35,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform highp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform highp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform highp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { vec4 bottom_blob_data[]; }; | |||
| @@ -71,18 +67,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_cp4(top_blob_1d, gx, bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_cp4(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -36,11 +36,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform highp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform highp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform highp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { mat2x4 bottom_blob_data[]; }; | |||
| @@ -72,18 +68,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_cp8(top_blob_1d, gx, bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_cp8(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; | |||
| @@ -32,11 +32,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; | |||
| @@ -61,19 +57,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afp v; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v = image1d_ld1(bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -83,18 +67,7 @@ void main() | |||
| v = clamp(v, afp(const_min), afp(const_max)); | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st1(top_blob_1d, gx, v); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy), v); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| } | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| #else | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| #endif | |||
| @@ -32,11 +32,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| @@ -61,19 +57,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec4 v; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v = image1d_ld4(bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -83,18 +67,7 @@ void main() | |||
| v = clamp(v, afp(const_min), afp(const_max)); | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st4(top_blob_1d, gx, v); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st4(top_blob_2d, ivec2(gx, gy), v); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| } | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| #else | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| #endif | |||
| @@ -33,11 +33,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| @@ -62,19 +58,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec8 v; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v = image1d_ld8(bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -85,18 +69,7 @@ void main() | |||
| v[1] = clamp(v[1], afp(const_min), afp(const_max)); | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st8(top_blob_1d, gx, v); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st8(top_blob_2d, ivec2(gx, gy), v); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| } | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| #else | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| #endif | |||
| @@ -37,11 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| @@ -74,29 +70,15 @@ void main() | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_cp1(top_blob_1d, gx + p.offset, bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| if (axis == 0) image2d_cp1(top_blob_2d, ivec2(gx, gy + p.offset), bottom_blob_2d, ivec2(gx, gy)); | |||
| if (axis == 1) image2d_cp1(top_blob_2d, ivec2(gx + p.offset, gy), bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| if (axis == 0) image3d_cp1(top_blob_3d, ivec3(gx, gy, gz + p.offset), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| if (axis == 1) image3d_cp1(top_blob_3d, ivec3(gx, gy + p.offset, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| if (axis == 2) image3d_cp1(top_blob_3d, ivec3(gx + p.offset, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| ivec3 gxyz = ivec3(gx, gy, gz); | |||
| gxyz[psc(dims) - 1 - axis] += p.offset; | |||
| #if NCNN_image_shader | |||
| image3d_cp1(top_blob_3d, gxyz, bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| int v_offset = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x; | |||
| buffer_cp1(top_blob_data, v_offset, bottom_blob_data, gi); | |||
| @@ -37,11 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| @@ -74,29 +70,15 @@ void main() | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_cp4(top_blob_1d, gx + p.offset, bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| if (axis == 0) image2d_cp4(top_blob_2d, ivec2(gx, gy + p.offset), bottom_blob_2d, ivec2(gx, gy)); | |||
| if (axis == 1) image2d_cp4(top_blob_2d, ivec2(gx + p.offset, gy), bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| if (axis == 0) image3d_cp4(top_blob_3d, ivec3(gx, gy, gz + p.offset), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| if (axis == 1) image3d_cp4(top_blob_3d, ivec3(gx, gy + p.offset, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| if (axis == 2) image3d_cp4(top_blob_3d, ivec3(gx + p.offset, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| ivec3 gxyz = ivec3(gx, gy, gz); | |||
| gxyz[psc(dims) - 1 - axis] += p.offset; | |||
| #if NCNN_image_shader | |||
| image3d_cp4(top_blob_3d, gxyz, bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| int v_offset = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x; | |||
| buffer_cp4(top_blob_data, v_offset, bottom_blob_data, gi); | |||
| @@ -37,11 +37,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| @@ -74,81 +70,65 @@ void main() | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| ivec3 gxyz = ivec3(gx, gy, gz); | |||
| gxyz[psc(dims) - 1] *= 4; | |||
| gxyz[psc(dims) - 1 - axis] += p.offset; | |||
| #if NCNN_image_shader | |||
| afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| if (psc(dims) == 1) | |||
| { | |||
| afpvec4 v = image1d_ld4(bottom_blob_1d, gx); | |||
| int gx4 = gx * 4 + p.offset; | |||
| image1d_st1(top_blob_1d, gx4 + 0, v.r); | |||
| image1d_st1(top_blob_1d, gx4 + 1, v.g); | |||
| image1d_st1(top_blob_1d, gx4 + 2, v.b); | |||
| image1d_st1(top_blob_1d, gx4 + 3, v.a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, 0, 0), v.r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, 0, 0), v.g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, 0, 0), v.b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, 0, 0), v.a); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| afpvec4 v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); | |||
| if (axis == 0) | |||
| { | |||
| int gy4 = gy * 4 + p.offset; | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy4 + 0), v.r); | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy4 + 1), v.g); | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy4 + 2), v.b); | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy4 + 3), v.a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, 0), v.r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, 0), v.g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 2, 0), v.b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 3, 0), v.a); | |||
| } | |||
| if (axis == 1) | |||
| { | |||
| int gx4 = gx * 4 + p.offset; | |||
| image2d_st1(top_blob_2d, ivec2(gx4 + 0, gy), v.r); | |||
| image2d_st1(top_blob_2d, ivec2(gx4 + 1, gy), v.g); | |||
| image2d_st1(top_blob_2d, ivec2(gx4 + 2, gy), v.b); | |||
| image2d_st1(top_blob_2d, ivec2(gx4 + 3, gy), v.a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, 0), v.r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, 0), v.g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, gxyz.y, 0), v.b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, gxyz.y, 0), v.a); | |||
| } | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| if (axis == 0) | |||
| { | |||
| int gz4 = gz * 4 + p.offset; | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz4 + 0), v.r); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz4 + 1), v.g); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz4 + 2), v.b); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz4 + 3), v.a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 0), v.r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 1), v.g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 2), v.b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 3), v.a); | |||
| } | |||
| if (axis == 1) | |||
| { | |||
| int gy4 = gy * 4 + p.offset; | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy4 + 0, gz), v.r); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy4 + 1, gz), v.g); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy4 + 2, gz), v.b); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy4 + 3, gz), v.a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, gxyz.z), v.r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, gxyz.z), v.g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 2, gxyz.z), v.b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 3, gxyz.z), v.a); | |||
| } | |||
| if (axis == 2) | |||
| { | |||
| int gx4 = gx * 4 + p.offset; | |||
| image3d_st1(top_blob_3d, ivec3(gx4 + 0, gy, gz), v.r); | |||
| image3d_st1(top_blob_3d, ivec3(gx4 + 1, gy, gz), v.g); | |||
| image3d_st1(top_blob_3d, ivec3(gx4 + 2, gy, gz), v.b); | |||
| image3d_st1(top_blob_3d, ivec3(gx4 + 3, gy, gz), v.a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, gxyz.z), v.r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, gxyz.z), v.g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, gxyz.y, gxyz.z), v.b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, gxyz.y, gxyz.z), v.a); | |||
| } | |||
| } | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| ivec3 gxyz = ivec3(gx, gy, gz); | |||
| gxyz[psc(dims) - 1] *= 4; | |||
| gxyz[psc(dims) - 1 - axis] += p.offset; | |||
| int v_offset_0 = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x; | |||
| ivec3 gxyz4 = ivec3(1, psc(outw), psc(outcstep)); | |||
| @@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| @@ -75,29 +71,15 @@ void main() | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_cp8(top_blob_1d, gx + p.offset, bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| if (axis == 0) image2d_cp8(top_blob_2d, ivec2(gx, gy + p.offset), bottom_blob_2d, ivec2(gx, gy)); | |||
| if (axis == 1) image2d_cp8(top_blob_2d, ivec2(gx + p.offset, gy), bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| if (axis == 0) image3d_cp8(top_blob_3d, ivec3(gx, gy, gz + p.offset), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| if (axis == 1) image3d_cp8(top_blob_3d, ivec3(gx, gy + p.offset, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| if (axis == 2) image3d_cp8(top_blob_3d, ivec3(gx + p.offset, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| ivec3 gxyz = ivec3(gx, gy, gz); | |||
| gxyz[psc(dims) - 1 - axis] += p.offset; | |||
| #if NCNN_image_shader | |||
| image3d_cp8(top_blob_3d, gxyz, bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| int v_offset = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x; | |||
| buffer_cp8(top_blob_data, v_offset, bottom_blob_data, gi); | |||
| @@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| @@ -75,105 +71,89 @@ void main() | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| ivec3 gxyz = ivec3(gx, gy, gz); | |||
| gxyz[psc(dims) - 1] *= 8; | |||
| gxyz[psc(dims) - 1 - axis] += p.offset; | |||
| #if NCNN_image_shader | |||
| afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| if (psc(dims) == 1) | |||
| { | |||
| afpvec8 v = image1d_ld8(bottom_blob_1d, gx); | |||
| int gx8 = gx * 8 + p.offset; | |||
| image1d_st1(top_blob_1d, gx8 + 0, v[0].r); | |||
| image1d_st1(top_blob_1d, gx8 + 1, v[0].g); | |||
| image1d_st1(top_blob_1d, gx8 + 2, v[0].b); | |||
| image1d_st1(top_blob_1d, gx8 + 3, v[0].a); | |||
| image1d_st1(top_blob_1d, gx8 + 4, v[1].r); | |||
| image1d_st1(top_blob_1d, gx8 + 5, v[1].g); | |||
| image1d_st1(top_blob_1d, gx8 + 6, v[1].b); | |||
| image1d_st1(top_blob_1d, gx8 + 7, v[1].a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, 0, 0), v[0].r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, 0, 0), v[0].g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, 0, 0), v[0].b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, 0, 0), v[0].a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 4, 0, 0), v[1].r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 5, 0, 0), v[1].g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 6, 0, 0), v[1].b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 7, 0, 0), v[1].a); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| afpvec8 v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); | |||
| if (axis == 0) | |||
| { | |||
| int gy8 = gy * 8 + p.offset; | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy8 + 0), v[0].r); | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy8 + 1), v[0].g); | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy8 + 2), v[0].b); | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy8 + 3), v[0].a); | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy8 + 4), v[1].r); | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy8 + 5), v[1].g); | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy8 + 6), v[1].b); | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy8 + 7), v[1].a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, 0), v[0].r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, 0), v[0].g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 2, 0), v[0].b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 3, 0), v[0].a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 4, 0), v[1].r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 5, 0), v[1].g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 6, 0), v[1].b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 7, 0), v[1].a); | |||
| } | |||
| if (axis == 1) | |||
| { | |||
| int gx8 = gx * 8 + p.offset; | |||
| image2d_st1(top_blob_2d, ivec2(gx8 + 0, gy), v[0].r); | |||
| image2d_st1(top_blob_2d, ivec2(gx8 + 1, gy), v[0].g); | |||
| image2d_st1(top_blob_2d, ivec2(gx8 + 2, gy), v[0].b); | |||
| image2d_st1(top_blob_2d, ivec2(gx8 + 3, gy), v[0].a); | |||
| image2d_st1(top_blob_2d, ivec2(gx8 + 4, gy), v[1].r); | |||
| image2d_st1(top_blob_2d, ivec2(gx8 + 5, gy), v[1].g); | |||
| image2d_st1(top_blob_2d, ivec2(gx8 + 6, gy), v[1].b); | |||
| image2d_st1(top_blob_2d, ivec2(gx8 + 7, gy), v[1].a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, 0), v[0].r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, 0), v[0].g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, gxyz.y, 0), v[0].b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, gxyz.y, 0), v[0].a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 4, gxyz.y, 0), v[1].r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 5, gxyz.y, 0), v[1].g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 6, gxyz.y, 0), v[1].b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 7, gxyz.y, 0), v[1].a); | |||
| } | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| if (axis == 0) | |||
| { | |||
| int gz8 = gz * 8 + p.offset; | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 0), v[0].r); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 1), v[0].g); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 2), v[0].b); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 3), v[0].a); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 4), v[1].r); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 5), v[1].g); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 6), v[1].b); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz8 + 7), v[1].a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 0), v[0].r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 1), v[0].g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 2), v[0].b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 3), v[0].a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 4), v[1].r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 5), v[1].g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 6), v[1].b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 7), v[1].a); | |||
| } | |||
| if (axis == 1) | |||
| { | |||
| int gy8 = gy * 8 + p.offset; | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy8 + 0, gz), v[0].r); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy8 + 1, gz), v[0].g); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy8 + 2, gz), v[0].b); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy8 + 3, gz), v[0].a); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy8 + 4, gz), v[1].r); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy8 + 5, gz), v[1].g); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy8 + 6, gz), v[1].b); | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy8 + 7, gz), v[1].a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, gxyz.z), v[0].r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, gxyz.z), v[0].g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 2, gxyz.z), v[0].b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 3, gxyz.z), v[0].a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 4, gxyz.z), v[1].r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 5, gxyz.z), v[1].g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 6, gxyz.z), v[1].b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x, gxyz.y + 7, gxyz.z), v[1].a); | |||
| } | |||
| if (axis == 2) | |||
| { | |||
| int gx8 = gx * 8 + p.offset; | |||
| image3d_st1(top_blob_3d, ivec3(gx8 + 0, gy, gz), v[0].r); | |||
| image3d_st1(top_blob_3d, ivec3(gx8 + 1, gy, gz), v[0].g); | |||
| image3d_st1(top_blob_3d, ivec3(gx8 + 2, gy, gz), v[0].b); | |||
| image3d_st1(top_blob_3d, ivec3(gx8 + 3, gy, gz), v[0].a); | |||
| image3d_st1(top_blob_3d, ivec3(gx8 + 4, gy, gz), v[1].r); | |||
| image3d_st1(top_blob_3d, ivec3(gx8 + 5, gy, gz), v[1].g); | |||
| image3d_st1(top_blob_3d, ivec3(gx8 + 6, gy, gz), v[1].b); | |||
| image3d_st1(top_blob_3d, ivec3(gx8 + 7, gy, gz), v[1].a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, gxyz.z), v[0].r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, gxyz.z), v[0].g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 2, gxyz.y, gxyz.z), v[0].b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 3, gxyz.y, gxyz.z), v[0].a); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 4, gxyz.y, gxyz.z), v[1].r); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 5, gxyz.y, gxyz.z), v[1].g); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 6, gxyz.y, gxyz.z), v[1].b); | |||
| image3d_st1(top_blob_3d, ivec3(gxyz.x + 7, gxyz.y, gxyz.z), v[1].a); | |||
| } | |||
| } | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| ivec3 gxyz = ivec3(gx, gy, gz); | |||
| gxyz[psc(dims) - 1] *= 8; | |||
| gxyz[psc(dims) - 1 - axis] += p.offset; | |||
| int v_offset_0 = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x; | |||
| ivec3 gxyz4 = ivec3(1, psc(outw), psc(outcstep)); | |||
| @@ -38,11 +38,7 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| @@ -75,70 +71,54 @@ void main() | |||
| if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) | |||
| return; | |||
| ivec3 gxyz = ivec3(gx, gy, gz); | |||
| gxyz[psc(dims) - 1] *= 2; | |||
| gxyz[psc(dims) - 1 - axis] += p.offset; | |||
| #if NCNN_image_shader | |||
| afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| if (psc(dims) == 1) | |||
| { | |||
| afpvec8 v = image1d_ld8(bottom_blob_1d, gx); | |||
| int gx2 = gx * 2 + p.offset; | |||
| image1d_st4(top_blob_1d, gx2 + 0, v[0]); | |||
| image1d_st4(top_blob_1d, gx2 + 1, v[1]); | |||
| image3d_st4(top_blob_3d, ivec3(gxyz.x + 0, 0, 0), v[0]); | |||
| image3d_st4(top_blob_3d, ivec3(gxyz.x + 1, 0, 0), v[1]); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| afpvec8 v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); | |||
| if (axis == 0) | |||
| { | |||
| int gy2 = gy * 2 + p.offset; | |||
| image2d_st4(top_blob_2d, ivec2(gx, gy2 + 0), v[0]); | |||
| image2d_st4(top_blob_2d, ivec2(gx, gy2 + 1), v[1]); | |||
| image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, 0), v[0]); | |||
| image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, 0), v[1]); | |||
| } | |||
| if (axis == 1) | |||
| { | |||
| int gx2 = gx * 2 + p.offset; | |||
| image2d_st4(top_blob_2d, ivec2(gx2 + 0, gy), v[0]); | |||
| image2d_st4(top_blob_2d, ivec2(gx2 + 1, gy), v[1]); | |||
| image3d_st4(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, 0), v[0]); | |||
| image3d_st4(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, 0), v[1]); | |||
| } | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| if (axis == 0) | |||
| { | |||
| int gz2 = gz * 2 + p.offset; | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz2 + 0), v[0]); | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz2 + 1), v[1]); | |||
| image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 0), v[0]); | |||
| image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 1), v[1]); | |||
| } | |||
| if (axis == 1) | |||
| { | |||
| int gy2 = gy * 2 + p.offset; | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy2 + 0, gz), v[0]); | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy2 + 1, gz), v[1]); | |||
| image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y + 0, gxyz.z), v[0]); | |||
| image3d_st4(top_blob_3d, ivec3(gxyz.x, gxyz.y + 1, gxyz.z), v[1]); | |||
| } | |||
| if (axis == 2) | |||
| { | |||
| int gx2 = gx * 2 + p.offset; | |||
| image3d_st4(top_blob_3d, ivec3(gx2 + 0, gy, gz), v[0]); | |||
| image3d_st4(top_blob_3d, ivec3(gx2 + 1, gy, gz), v[1]); | |||
| image3d_st4(top_blob_3d, ivec3(gxyz.x + 0, gxyz.y, gxyz.z), v[0]); | |||
| image3d_st4(top_blob_3d, ivec3(gxyz.x + 1, gxyz.y, gxyz.z), v[1]); | |||
| } | |||
| } | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| ivec3 gxyz = ivec3(gx, gy, gz); | |||
| gxyz[psc(dims) - 1] *= 2; | |||
| gxyz[psc(dims) - 1 - axis] += p.offset; | |||
| int v_offset_0 = gxyz.z * psc(outcstep) + gxyz.y * psc(outw) + gxyz.x; | |||
| ivec3 gxyz4 = ivec3(1, psc(outw), psc(outcstep)); | |||
| @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; | |||
| @@ -86,7 +86,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld1(bias_blob, gz); | |||
| sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld1(bias_data, gz); | |||
| #endif | |||
| @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| #if NCNN_fp16_packed | |||
| layout (binding = 0) readonly buffer bottom_blob { vec4 bottom_blob_data[]; }; | |||
| @@ -100,7 +100,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = afpvec4(image1d_ld1(bias_blob, gz)); | |||
| sum = afpvec4(image3d_ld1(bias_blob, ivec3(gz, 0, 0))); | |||
| #else | |||
| sum = afpvec4(buffer_ld1(bias_data, gz)); | |||
| #endif | |||
| @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -86,7 +86,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld4(bias_blob, gz); | |||
| sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld4(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld8(bias_blob, gz); | |||
| sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld8(bias_data, gz); | |||
| #endif | |||
| @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -91,7 +91,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld4(bias_blob, gz); | |||
| sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld4(bias_data, gz); | |||
| #endif | |||
| @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -103,7 +103,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| afpvec4 b = image1d_ld4(bias_blob, gz); | |||
| afpvec4 b = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| afpvec4 b = buffer_ld4(bias_data, gz); | |||
| #endif | |||
| @@ -40,7 +40,7 @@ layout (constant_id = shape_constant_id_offset + 6) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler3D top_tm_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 2) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer top_tm_blob { sfpvec4 top_tm_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -129,7 +129,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| const afpvec4 bias_value = image1d_ld4(bias_blob, gz); | |||
| const afpvec4 bias_value = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| const afpvec4 bias_value = buffer_ld4(bias_data, gz); | |||
| #endif | |||
| @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; | |||
| @@ -86,7 +86,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld1(bias_blob, gz); | |||
| sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld1(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld8(bias_blob, gz); | |||
| sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld8(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld8(bias_blob, gz); | |||
| sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld8(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -99,7 +99,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| afpvec8 b = image1d_ld8(bias_blob, gz); | |||
| afpvec8 b = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| afpvec8 b = buffer_ld8(bias_data, gz); | |||
| #endif | |||
| @@ -41,7 +41,7 @@ layout (constant_id = shape_constant_id_offset + 6) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler3D top_tm_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 2) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer top_tm_blob { sfpvec8 top_tm_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -130,7 +130,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| const afpvec8 bias_value = image1d_ld8(bias_blob, gz); | |||
| const afpvec8 bias_value = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| const afpvec8 bias_value = buffer_ld8(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld1(bias_blob, gz); | |||
| sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld1(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld4(bias_blob, gz); | |||
| sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld4(bias_data, gz); | |||
| #endif | |||
| @@ -49,8 +49,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler2D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld1(bias_blob, gz); | |||
| sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld1(bias_data, gz); | |||
| #endif | |||
| @@ -108,7 +108,7 @@ void main() | |||
| for (int x = 0; x < kernel_w; x++) | |||
| { | |||
| sum += image2d_ld1(weight_blob, ivec2(wx, gz)) * image3d_ld1(bottom_blob, ivec3(sx, sy, gz)); | |||
| sum += image3d_ld1(weight_blob, ivec3(wx, gz, 0)) * image3d_ld1(bottom_blob, ivec3(sx, sy, gz)); | |||
| sx += dilation_w; | |||
| wx += 1; | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld1(bias_blob, gz); | |||
| sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld1(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld4(bias_blob, gz); | |||
| sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld4(bias_data, gz); | |||
| #endif | |||
| @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -88,7 +88,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld8(bias_blob, gz); | |||
| sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld8(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -92,7 +92,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld4(bias_blob, gz); | |||
| sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld4(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld1(bias_blob, gz); | |||
| sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld1(bias_data, gz); | |||
| #endif | |||
| @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -88,7 +88,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld8(bias_blob, gz); | |||
| sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld8(bias_data, gz); | |||
| #endif | |||
| @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -88,7 +88,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld8(bias_blob, gz); | |||
| sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld8(bias_data, gz); | |||
| #endif | |||
| @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; | |||
| @@ -88,7 +88,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld1(bias_blob, gz); | |||
| sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld1(bias_data, gz); | |||
| #endif | |||
| @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -88,7 +88,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld4(bias_blob, gz); | |||
| sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld4(bias_data, gz); | |||
| #endif | |||
| @@ -49,8 +49,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler2D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld4(bias_blob, gz); | |||
| sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld4(bias_data, gz); | |||
| #endif | |||
| @@ -110,7 +110,7 @@ void main() | |||
| { | |||
| afpvec4 v = image3d_ld4(bottom_blob, ivec3(sx, sy, gz)); | |||
| afpvec4 k = image2d_ld4(weight_blob, ivec2(wx, gz)); | |||
| afpvec4 k = image3d_ld4(weight_blob, ivec3(wx, gz, 0)); | |||
| sum += v * k; | |||
| @@ -50,8 +50,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler2D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -88,7 +88,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld8(bias_blob, gz); | |||
| sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld8(bias_data, gz); | |||
| #endif | |||
| @@ -111,7 +111,7 @@ void main() | |||
| { | |||
| afpvec8 v = image3d_ld8(bottom_blob, ivec3(sx, sy, gz)); | |||
| afpvec8 k = image2d_ld8(weight_blob, ivec2(wx, gz)); | |||
| afpvec8 k = image3d_ld8(weight_blob, ivec3(wx, gz, 0)); | |||
| // sum += v * k; | |||
| sum[0] += v[0] * k[0]; | |||
| @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; | |||
| @@ -86,7 +86,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld1(bias_blob, gz); | |||
| sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld1(bias_data, gz); | |||
| #endif | |||
| @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -86,7 +86,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld4(bias_blob, gz); | |||
| sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld4(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld8(bias_blob, gz); | |||
| sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld8(bias_data, gz); | |||
| #endif | |||
| @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -91,7 +91,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld4(bias_blob, gz); | |||
| sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld4(bias_data, gz); | |||
| #endif | |||
| @@ -49,7 +49,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; | |||
| @@ -86,7 +86,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld1(bias_blob, gz); | |||
| sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld1(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld8(bias_blob, gz); | |||
| sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld8(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld8(bias_blob, gz); | |||
| sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld8(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld1(bias_blob, gz); | |||
| sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld1(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld4(bias_blob, gz); | |||
| sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld4(bias_data, gz); | |||
| #endif | |||
| @@ -49,8 +49,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler2D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld1(bias_blob, gz); | |||
| sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld1(bias_data, gz); | |||
| #endif | |||
| @@ -124,7 +124,7 @@ void main() | |||
| int wx = y * kernel_w + x; | |||
| sum += image2d_ld1(weight_blob, ivec2(wx, gz)) * image3d_ld1(bottom_blob, ivec3(sx, sy, gz)); | |||
| sum += image3d_ld1(weight_blob, ivec3(wx, gz, 0)) * image3d_ld1(bottom_blob, ivec3(sx, sy, gz)); | |||
| } | |||
| } | |||
| #else | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld1(bias_blob, gz); | |||
| sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld1(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld4(bias_blob, gz); | |||
| sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld4(bias_data, gz); | |||
| #endif | |||
| @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -88,7 +88,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld8(bias_blob, gz); | |||
| sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld8(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -92,7 +92,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld4(bias_blob, gz); | |||
| sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld4(bias_data, gz); | |||
| #endif | |||
| @@ -50,7 +50,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld1(bias_blob, gz); | |||
| sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld1(bias_data, gz); | |||
| #endif | |||
| @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -88,7 +88,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld8(bias_blob, gz); | |||
| sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld8(bias_data, gz); | |||
| #endif | |||
| @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -88,7 +88,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld8(bias_blob, gz); | |||
| sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld8(bias_data, gz); | |||
| #endif | |||
| @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; | |||
| @@ -88,7 +88,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld1(bias_blob, gz); | |||
| sum = image3d_ld1(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld1(bias_data, gz); | |||
| #endif | |||
| @@ -51,7 +51,7 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -88,7 +88,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld4(bias_blob, gz); | |||
| sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld4(bias_data, gz); | |||
| #endif | |||
| @@ -49,8 +49,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler2D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -87,7 +87,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld4(bias_blob, gz); | |||
| sum = image3d_ld4(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld4(bias_data, gz); | |||
| #endif | |||
| @@ -126,7 +126,7 @@ void main() | |||
| afpvec4 v = image3d_ld4(bottom_blob, ivec3(sx, sy, gz)); | |||
| afpvec4 k = image2d_ld4(weight_blob, ivec2(wx, gz)); | |||
| afpvec4 k = image3d_ld4(weight_blob, ivec3(wx, gz, 0)); | |||
| sum += v * k; | |||
| } | |||
| @@ -50,8 +50,8 @@ layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| layout (binding = 2) uniform unfp sampler2D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler1D bias_blob; | |||
| layout (binding = 2) uniform unfp sampler3D weight_blob; | |||
| layout (binding = 3) uniform unfp sampler3D bias_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -88,7 +88,7 @@ void main() | |||
| if (bias_term == 1) | |||
| { | |||
| #if NCNN_image_shader | |||
| sum = image1d_ld8(bias_blob, gz); | |||
| sum = image3d_ld8(bias_blob, ivec3(gz, 0, 0)); | |||
| #else | |||
| sum = buffer_ld8(bias_data, gz); | |||
| #endif | |||
| @@ -127,7 +127,7 @@ void main() | |||
| afpvec8 v = image3d_ld8(bottom_blob, ivec3(sx, sy, gz)); | |||
| afpvec8 k = image2d_ld8(weight_blob, ivec2(wx, gz)); | |||
| afpvec8 k = image3d_ld8(weight_blob, ivec3(wx, gz, 0)); | |||
| // sum += v * k; | |||
| sum[0] += v[0] * k[0]; | |||
| @@ -29,11 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| @@ -59,18 +55,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_cp1(top_blob_1d, gx, bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_cp1(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -29,11 +29,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; | |||
| @@ -59,18 +55,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_cp4(top_blob_1d, gx, bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_cp4(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| image3d_cp4(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -30,11 +30,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; | |||
| @@ -60,18 +56,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_cp8(top_blob_1d, gx, bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_cp8(top_blob_2d, ivec2(gx, gy), bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| image3d_cp8(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -31,11 +31,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; }; | |||
| @@ -60,19 +56,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afp v; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v = image1d_ld1(bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -82,18 +66,7 @@ void main() | |||
| v *= afp(scale); | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st1(top_blob_1d, gx, v); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy), v); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| } | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| #else | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| #endif | |||
| @@ -31,11 +31,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| @@ -60,19 +56,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec4 v; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v = image1d_ld4(bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -82,18 +66,7 @@ void main() | |||
| v *= afp(scale); | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st4(top_blob_1d, gx, v); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st4(top_blob_2d, ivec2(gx, gy), v); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| } | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| #else | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| #endif | |||
| @@ -32,11 +32,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| @@ -61,19 +57,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec8 v; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v = image1d_ld8(bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -84,18 +68,7 @@ void main() | |||
| v[1] = v[1] * afp(scale); | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st8(top_blob_1d, gx, v); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st8(top_blob_2d, ivec2(gx, gy), v); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| } | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| #else | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| #endif | |||
| @@ -32,14 +32,8 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob1_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob1_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob1_3d; | |||
| layout (binding = 1) uniform unfp sampler1D bottom_blob2_1d; | |||
| layout (binding = 1) uniform unfp sampler2D bottom_blob2_2d; | |||
| layout (binding = 1) uniform unfp sampler3D bottom_blob2_3d; | |||
| layout (binding = 2, imfmtc1) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 2, imfmtc1) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 2, imfmtc1) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob1 { sfp bottom_blob1_data[]; }; | |||
| @@ -69,23 +63,8 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afp v1; | |||
| afp v2; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v1 = image1d_ld1(bottom_blob1_1d, gx); | |||
| v2 = image1d_ld1(bottom_blob2_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v1 = image2d_ld1(bottom_blob1_2d, ivec2(gx, gy)); | |||
| v2 = image2d_ld1(bottom_blob2_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v1 = image3d_ld1(bottom_blob1_3d, ivec3(gx, gy, gz)); | |||
| v2 = image3d_ld1(bottom_blob2_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afp v1 = image3d_ld1(bottom_blob1_3d, ivec3(gx, gy, gz)); | |||
| afp v2 = image3d_ld1(bottom_blob2_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -119,18 +98,7 @@ void main() | |||
| } | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st1(top_blob_1d, gx, res); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy), res); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| } | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| #else | |||
| buffer_st1(top_blob_data, gi, res); | |||
| #endif | |||
| @@ -32,14 +32,8 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob1_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob1_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob1_3d; | |||
| layout (binding = 1) uniform unfp sampler1D bottom_blob2_1d; | |||
| layout (binding = 1) uniform unfp sampler2D bottom_blob2_2d; | |||
| layout (binding = 1) uniform unfp sampler3D bottom_blob2_3d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob1 { sfpvec4 bottom_blob1_data[]; }; | |||
| @@ -69,23 +63,8 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec4 v1; | |||
| afpvec4 v2; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v1 = image1d_ld4(bottom_blob1_1d, gx); | |||
| v2 = image1d_ld4(bottom_blob2_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v1 = image2d_ld4(bottom_blob1_2d, ivec2(gx, gy)); | |||
| v2 = image2d_ld4(bottom_blob2_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v1 = image3d_ld4(bottom_blob1_3d, ivec3(gx, gy, gz)); | |||
| v2 = image3d_ld4(bottom_blob2_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec4 v1 = image3d_ld4(bottom_blob1_3d, ivec3(gx, gy, gz)); | |||
| afpvec4 v2 = image3d_ld4(bottom_blob2_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -119,18 +98,7 @@ void main() | |||
| } | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st4(top_blob_1d, gx, res); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st4(top_blob_2d, ivec2(gx, gy), res); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| } | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| #else | |||
| buffer_st4(top_blob_data, gi, res); | |||
| #endif | |||
| @@ -33,14 +33,8 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob1_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob1_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob1_3d; | |||
| layout (binding = 1) uniform unfp sampler1D bottom_blob2_1d; | |||
| layout (binding = 1) uniform unfp sampler2D bottom_blob2_2d; | |||
| layout (binding = 1) uniform unfp sampler3D bottom_blob2_3d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 2, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob1 { sfpvec8 bottom_blob1_data[]; }; | |||
| @@ -70,23 +64,8 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec8 v1; | |||
| afpvec8 v2; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v1 = image1d_ld8(bottom_blob1_1d, gx); | |||
| v2 = image1d_ld8(bottom_blob2_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v1 = image2d_ld8(bottom_blob1_2d, ivec2(gx, gy)); | |||
| v2 = image2d_ld8(bottom_blob2_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v1 = image3d_ld8(bottom_blob1_3d, ivec3(gx, gy, gz)); | |||
| v2 = image3d_ld8(bottom_blob2_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec8 v1 = image3d_ld8(bottom_blob1_3d, ivec3(gx, gy, gz)); | |||
| afpvec8 v2 = image3d_ld8(bottom_blob2_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -138,18 +117,7 @@ void main() | |||
| } | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st8(top_blob_1d, gx, res); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st8(top_blob_2d, ivec2(gx, gy), res); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| } | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), res); | |||
| #else | |||
| buffer_st8(top_blob_data, gi, res); | |||
| #endif | |||
| @@ -35,9 +35,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image1D top_blob; | |||
| layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; | |||
| @@ -78,14 +77,14 @@ void main() | |||
| if (psc(dims) == 2) | |||
| { | |||
| v = image2d_ld1(bottom_blob_2d, ivec2(x, y)); | |||
| v = image3d_ld1(bottom_blob_3d, ivec3(x, y, 0)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v = image3d_ld1(bottom_blob_3d, ivec3(x, y, z)); | |||
| } | |||
| image1d_st1(top_blob, gx, v); | |||
| image3d_st1(top_blob, ivec3(gx, 0, 0), v); | |||
| #else | |||
| int v_offset = z * psc(cstep) + y * psc(w) + x; | |||
| @@ -35,9 +35,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; | |||
| @@ -77,10 +76,10 @@ void main() | |||
| ivec4 y4 = i4 / psc(w); | |||
| ivec4 x4 = i4 % psc(w); | |||
| v.r = image2d_ld1(bottom_blob_2d, ivec2(x4.r, y4.r)); | |||
| v.g = image2d_ld1(bottom_blob_2d, ivec2(x4.g, y4.g)); | |||
| v.b = image2d_ld1(bottom_blob_2d, ivec2(x4.b, y4.b)); | |||
| v.a = image2d_ld1(bottom_blob_2d, ivec2(x4.a, y4.a)); | |||
| v.r = image3d_ld1(bottom_blob_3d, ivec3(x4.r, y4.r, 0)); | |||
| v.g = image3d_ld1(bottom_blob_3d, ivec3(x4.g, y4.g, 0)); | |||
| v.b = image3d_ld1(bottom_blob_3d, ivec3(x4.b, y4.b, 0)); | |||
| v.a = image3d_ld1(bottom_blob_3d, ivec3(x4.a, y4.a, 0)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| @@ -96,7 +95,7 @@ void main() | |||
| v.a = image3d_ld1(bottom_blob_3d, ivec3(x4.a, y4.a, z4.a)); | |||
| } | |||
| image1d_st4(top_blob, gx, v); | |||
| image3d_st4(top_blob, ivec3(gx, 0, 0), v); | |||
| #else | |||
| ivec4 v_offset; | |||
| @@ -36,9 +36,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| #else | |||
| layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; }; | |||
| layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; }; | |||
| @@ -82,14 +81,14 @@ void main() | |||
| ivec4 yy4 = ii4 / psc(w); | |||
| ivec4 xx4 = ii4 % psc(w); | |||
| v[0].r = image2d_ld1(bottom_blob_2d, ivec2(x4.r, y4.r)); | |||
| v[0].g = image2d_ld1(bottom_blob_2d, ivec2(x4.g, y4.g)); | |||
| v[0].b = image2d_ld1(bottom_blob_2d, ivec2(x4.b, y4.b)); | |||
| v[0].a = image2d_ld1(bottom_blob_2d, ivec2(x4.a, y4.a)); | |||
| v[1].r = image2d_ld1(bottom_blob_2d, ivec2(xx4.r, yy4.r)); | |||
| v[1].g = image2d_ld1(bottom_blob_2d, ivec2(xx4.g, yy4.g)); | |||
| v[1].b = image2d_ld1(bottom_blob_2d, ivec2(xx4.b, yy4.b)); | |||
| v[1].a = image2d_ld1(bottom_blob_2d, ivec2(xx4.a, yy4.a)); | |||
| v[0].r = image3d_ld1(bottom_blob_3d, ivec3(x4.r, y4.r, 0)); | |||
| v[0].g = image3d_ld1(bottom_blob_3d, ivec3(x4.g, y4.g, 0)); | |||
| v[0].b = image3d_ld1(bottom_blob_3d, ivec3(x4.b, y4.b, 0)); | |||
| v[0].a = image3d_ld1(bottom_blob_3d, ivec3(x4.a, y4.a, 0)); | |||
| v[1].r = image3d_ld1(bottom_blob_3d, ivec3(xx4.r, yy4.r, 0)); | |||
| v[1].g = image3d_ld1(bottom_blob_3d, ivec3(xx4.g, yy4.g, 0)); | |||
| v[1].b = image3d_ld1(bottom_blob_3d, ivec3(xx4.b, yy4.b, 0)); | |||
| v[1].a = image3d_ld1(bottom_blob_3d, ivec3(xx4.a, yy4.a, 0)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| @@ -113,7 +112,7 @@ void main() | |||
| v[1].a = image3d_ld1(bottom_blob_3d, ivec3(xx4.a, yy4.a, zz4.a)); | |||
| } | |||
| image1d_st8(top_blob, gx, v); | |||
| image3d_st8(top_blob, ivec3(gx, 0, 0), v); | |||
| #else | |||
| ivec4 v_offset; | |||
| ivec4 vv_offset; | |||
| @@ -35,9 +35,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| #else | |||
| #if NCNN_fp16_packed | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec2 bottom_blob_data[]; }; | |||
| @@ -81,10 +80,10 @@ void main() | |||
| ivec4 y4 = i4 / psc(w); | |||
| ivec4 x4 = i4 % psc(w); | |||
| afpvec4 v0 = image2d_ld4(bottom_blob_2d, ivec2(x4.r, y4.r / 4)); | |||
| afpvec4 v1 = image2d_ld4(bottom_blob_2d, ivec2(x4.g, y4.g / 4)); | |||
| afpvec4 v2 = image2d_ld4(bottom_blob_2d, ivec2(x4.b, y4.b / 4)); | |||
| afpvec4 v3 = image2d_ld4(bottom_blob_2d, ivec2(x4.a, y4.a / 4)); | |||
| afpvec4 v0 = image3d_ld4(bottom_blob_3d, ivec3(x4.r, y4.r / 4, 0)); | |||
| afpvec4 v1 = image3d_ld4(bottom_blob_3d, ivec3(x4.g, y4.g / 4, 0)); | |||
| afpvec4 v2 = image3d_ld4(bottom_blob_3d, ivec3(x4.b, y4.b / 4, 0)); | |||
| afpvec4 v3 = image3d_ld4(bottom_blob_3d, ivec3(x4.a, y4.a / 4, 0)); | |||
| v.r = v0[y4.r % 4]; | |||
| v.g = v1[y4.g % 4]; | |||
| @@ -110,7 +109,7 @@ void main() | |||
| v.a = v3[z4.a % 4]; | |||
| } | |||
| image1d_st4(top_blob, gx, v); | |||
| image3d_st4(top_blob, ivec3(gx, 0, 0), v); | |||
| #else | |||
| #if NCNN_fp16_packed | |||
| ivec4 v_offset; | |||
| @@ -36,9 +36,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| #else | |||
| #if NCNN_fp16_packed | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec2 bottom_blob_data[]; }; | |||
| @@ -85,14 +84,14 @@ void main() | |||
| ivec4 yy4 = ii4 / psc(w); | |||
| ivec4 xx4 = ii4 % psc(w); | |||
| afpvec4 v0 = image2d_ld4(bottom_blob_2d, ivec2(x4.r, y4.r / 4)); | |||
| afpvec4 v1 = image2d_ld4(bottom_blob_2d, ivec2(x4.g, y4.g / 4)); | |||
| afpvec4 v2 = image2d_ld4(bottom_blob_2d, ivec2(x4.b, y4.b / 4)); | |||
| afpvec4 v3 = image2d_ld4(bottom_blob_2d, ivec2(x4.a, y4.a / 4)); | |||
| afpvec4 v4 = image2d_ld4(bottom_blob_2d, ivec2(xx4.r, yy4.r / 4)); | |||
| afpvec4 v5 = image2d_ld4(bottom_blob_2d, ivec2(xx4.g, yy4.g / 4)); | |||
| afpvec4 v6 = image2d_ld4(bottom_blob_2d, ivec2(xx4.b, yy4.b / 4)); | |||
| afpvec4 v7 = image2d_ld4(bottom_blob_2d, ivec2(xx4.a, yy4.a / 4)); | |||
| afpvec4 v0 = image3d_ld4(bottom_blob_3d, ivec3(x4.r, y4.r / 4, 0)); | |||
| afpvec4 v1 = image3d_ld4(bottom_blob_3d, ivec3(x4.g, y4.g / 4, 0)); | |||
| afpvec4 v2 = image3d_ld4(bottom_blob_3d, ivec3(x4.b, y4.b / 4, 0)); | |||
| afpvec4 v3 = image3d_ld4(bottom_blob_3d, ivec3(x4.a, y4.a / 4, 0)); | |||
| afpvec4 v4 = image3d_ld4(bottom_blob_3d, ivec3(xx4.r, yy4.r / 4, 0)); | |||
| afpvec4 v5 = image3d_ld4(bottom_blob_3d, ivec3(xx4.g, yy4.g / 4, 0)); | |||
| afpvec4 v6 = image3d_ld4(bottom_blob_3d, ivec3(xx4.b, yy4.b / 4, 0)); | |||
| afpvec4 v7 = image3d_ld4(bottom_blob_3d, ivec3(xx4.a, yy4.a / 4, 0)); | |||
| v[0].r = v0[y4.r % 4]; | |||
| v[0].g = v1[y4.g % 4]; | |||
| @@ -133,7 +132,7 @@ void main() | |||
| v[1].a = v7[zz4.a % 4]; | |||
| } | |||
| image1d_st8(top_blob, gx, v); | |||
| image3d_st8(top_blob, ivec3(gx, 0, 0), v); | |||
| #else | |||
| #if NCNN_fp16_packed | |||
| ivec4 v_offset; | |||
| @@ -36,9 +36,8 @@ layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; | |||
| layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; | |||
| #else | |||
| #if NCNN_fp16_packed | |||
| layout (binding = 0) readonly buffer bottom_blob { sfpvec2 bottom_blob_data[]; }; | |||
| @@ -85,14 +84,14 @@ void main() | |||
| ivec4 yy4 = ii4 / psc(w); | |||
| ivec4 xx4 = ii4 % psc(w); | |||
| afpvec8 v0 = image2d_ld8(bottom_blob_2d, ivec2(x4.r, y4.r / 8)); | |||
| afpvec8 v1 = image2d_ld8(bottom_blob_2d, ivec2(x4.g, y4.g / 8)); | |||
| afpvec8 v2 = image2d_ld8(bottom_blob_2d, ivec2(x4.b, y4.b / 8)); | |||
| afpvec8 v3 = image2d_ld8(bottom_blob_2d, ivec2(x4.a, y4.a / 8)); | |||
| afpvec8 v4 = image2d_ld8(bottom_blob_2d, ivec2(xx4.r, yy4.r / 8)); | |||
| afpvec8 v5 = image2d_ld8(bottom_blob_2d, ivec2(xx4.g, yy4.g / 8)); | |||
| afpvec8 v6 = image2d_ld8(bottom_blob_2d, ivec2(xx4.b, yy4.b / 8)); | |||
| afpvec8 v7 = image2d_ld8(bottom_blob_2d, ivec2(xx4.a, yy4.a / 8)); | |||
| afpvec8 v0 = image3d_ld8(bottom_blob_3d, ivec3(x4.r, y4.r / 8, 0)); | |||
| afpvec8 v1 = image3d_ld8(bottom_blob_3d, ivec3(x4.g, y4.g / 8, 0)); | |||
| afpvec8 v2 = image3d_ld8(bottom_blob_3d, ivec3(x4.b, y4.b / 8, 0)); | |||
| afpvec8 v3 = image3d_ld8(bottom_blob_3d, ivec3(x4.a, y4.a / 8, 0)); | |||
| afpvec8 v4 = image3d_ld8(bottom_blob_3d, ivec3(xx4.r, yy4.r / 8, 0)); | |||
| afpvec8 v5 = image3d_ld8(bottom_blob_3d, ivec3(xx4.g, yy4.g / 8, 0)); | |||
| afpvec8 v6 = image3d_ld8(bottom_blob_3d, ivec3(xx4.b, yy4.b / 8, 0)); | |||
| afpvec8 v7 = image3d_ld8(bottom_blob_3d, ivec3(xx4.a, yy4.a / 8, 0)); | |||
| v[0].r = v0[(y4.r % 8) / 4][y4.r % 4]; | |||
| v[0].g = v1[(y4.g % 8) / 4][y4.g % 4]; | |||
| @@ -133,7 +132,7 @@ void main() | |||
| v[1].a = v7[(zz4.a % 8) / 4][zz4.a % 4]; | |||
| } | |||
| image1d_st8(top_blob, gx, v); | |||
| image3d_st8(top_blob, ivec3(gx, 0, 0), v); | |||
| #else | |||
| #if NCNN_fp16_packed | |||
| ivec4 v_offset; | |||
| @@ -61,19 +61,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afp v; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v = image1d_ld1(bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v = image2d_ld1(bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afp v = image3d_ld1(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -83,18 +71,7 @@ void main() | |||
| v = clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st1(top_blob_1d, gx, v); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st1(top_blob_2d, ivec2(gx, gy), v); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| } | |||
| image3d_st1(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| #else | |||
| buffer_st1(bottom_top_blob_data, gi, v); | |||
| #endif | |||
| @@ -32,11 +32,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; }; | |||
| @@ -61,19 +57,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec4 v; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v = image1d_ld4(bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v = image2d_ld4(bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -83,18 +67,7 @@ void main() | |||
| v = clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st4(top_blob_1d, gx, v); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st4(top_blob_2d, ivec2(gx, gy), v); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| } | |||
| image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| #else | |||
| buffer_st4(bottom_top_blob_data, gi, v); | |||
| #endif | |||
| @@ -33,11 +33,7 @@ layout (constant_id = shape_constant_id_offset + 3) const int c = 0; | |||
| layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0) uniform unfp sampler1D bottom_blob_1d; | |||
| layout (binding = 0) uniform unfp sampler2D bottom_blob_2d; | |||
| layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image1D top_blob_1d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image2D top_blob_2d; | |||
| layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d; | |||
| #else | |||
| layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; }; | |||
| @@ -62,19 +58,7 @@ void main() | |||
| return; | |||
| #if NCNN_image_shader | |||
| afpvec8 v; | |||
| if (psc(dims) == 1) | |||
| { | |||
| v = image1d_ld8(bottom_blob_1d, gx); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| v = image2d_ld8(bottom_blob_2d, ivec2(gx, gy)); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| } | |||
| afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); | |||
| #else | |||
| const int gi = gz * psc(cstep) + gy * psc(w) + gx; | |||
| @@ -85,18 +69,7 @@ void main() | |||
| v[1] = clamp(v[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f)); | |||
| #if NCNN_image_shader | |||
| if (psc(dims) == 1) | |||
| { | |||
| image1d_st8(top_blob_1d, gx, v); | |||
| } | |||
| else if (psc(dims) == 2) | |||
| { | |||
| image2d_st8(top_blob_2d, ivec2(gx, gy), v); | |||
| } | |||
| else // if (psc(dims) == 3) | |||
| { | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| } | |||
| image3d_st8(top_blob_3d, ivec3(gx, gy, gz), v); | |||
| #else | |||
| buffer_st8(bottom_top_blob_data, gi, v); | |||
| #endif | |||