diff --git a/glslang b/glslang index a9ac7d5f3..f3b687b3b 160000 --- a/glslang +++ b/glslang @@ -1 +1 @@ -Subproject commit a9ac7d5f307e5db5b8c4fbf904bdba8fca6283bc +Subproject commit f3b687b3b9f844c63eb1889b3d951a05bb5c5a87 diff --git a/src/gpu.cpp b/src/gpu.cpp index 0cfca0505..9b9c5ca8a 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -334,8 +334,10 @@ public: int support_VK_KHR_portability_subset; int support_VK_KHR_push_descriptor; int support_VK_KHR_sampler_ycbcr_conversion; + int support_VK_KHR_shader_bfloat16; int support_VK_KHR_shader_float16_int8; int support_VK_KHR_shader_float_controls; + int support_VK_KHR_shader_integer_dot_product; int support_VK_KHR_shader_non_semantic_info; int support_VK_KHR_shader_subgroup_extended_types; int support_VK_KHR_shader_subgroup_rotate; @@ -364,6 +366,8 @@ public: VkPhysicalDeviceSamplerYcbcrConversionFeaturesKHR querySamplerYcbcrConversionFeatures; VkPhysicalDeviceCooperativeMatrixFeaturesKHR queryCooperativeMatrixFeatures; VkPhysicalDeviceCooperativeMatrixFeaturesNV queryCooperativeMatrixFeaturesNV; + VkPhysicalDeviceShaderBfloat16FeaturesKHR queryShaderBfloat16Features; + VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR queryShaderIntegerDotProductFeatures; VkPhysicalDeviceSubgroupSizeControlFeaturesEXT querySubgroupSizeControlFeatures; VkPhysicalDeviceShaderSubgroupRotateFeaturesKHR queryShaderSubgroupRotateFeatures; VkPhysicalDeviceShaderAtomicFloatFeaturesEXT queryShaderAtomicFloatFeatures; @@ -371,6 +375,7 @@ public: // extension properties void* queryDeviceProperties; + VkPhysicalDeviceShaderIntegerDotProductProperties queryShaderIntegerDotProductProperties; VkPhysicalDeviceSubgroupProperties querySubgroupProperties; VkPhysicalDeviceDriverPropertiesKHR queryDriverProperties; VkPhysicalDeviceSubgroupSizeControlPropertiesEXT querySubgroupSizeControlProperties; @@ -671,8 +676,10 @@ int GpuInfoPrivate::query_extensions() support_VK_KHR_portability_subset = 0; support_VK_KHR_push_descriptor = 0; support_VK_KHR_sampler_ycbcr_conversion = 0; + support_VK_KHR_shader_bfloat16 = 0; support_VK_KHR_shader_float16_int8 = 0; support_VK_KHR_shader_float_controls = 0; + support_VK_KHR_shader_integer_dot_product = 0; support_VK_KHR_shader_non_semantic_info = 0; support_VK_KHR_shader_subgroup_extended_types = 0; support_VK_KHR_shader_subgroup_rotate = 0; @@ -733,10 +740,14 @@ int GpuInfoPrivate::query_extensions() support_VK_KHR_push_descriptor = exp.specVersion; else if (strcmp(exp.extensionName, "VK_KHR_sampler_ycbcr_conversion") == 0) support_VK_KHR_sampler_ycbcr_conversion = exp.specVersion; + else if (strcmp(exp.extensionName, "VK_KHR_shader_bfloat16") == 0) + support_VK_KHR_shader_bfloat16 = exp.specVersion; else if (strcmp(exp.extensionName, "VK_KHR_shader_float16_int8") == 0) support_VK_KHR_shader_float16_int8 = exp.specVersion; else if (strcmp(exp.extensionName, "VK_KHR_shader_float_controls") == 0) support_VK_KHR_shader_float_controls = exp.specVersion; + else if (strcmp(exp.extensionName, "VK_KHR_shader_integer_dot_product") == 0) + support_VK_KHR_shader_integer_dot_product = exp.specVersion; else if (strcmp(exp.extensionName, "VK_KHR_shader_non_semantic_info") == 0) support_VK_KHR_shader_non_semantic_info = exp.specVersion; else if (strcmp(exp.extensionName, "VK_KHR_shader_subgroup_extended_types") == 0) @@ -852,6 +863,26 @@ void GpuInfoPrivate::query_extension_features() queryExtensionFeatures = &queryCooperativeMatrixFeaturesNV; } + // query bfloat16 + memset(&queryShaderBfloat16Features, 0, sizeof(queryShaderBfloat16Features)); + queryShaderBfloat16Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_BFLOAT16_FEATURES_KHR; + queryShaderBfloat16Features.pNext = 0; + if (support_VK_KHR_shader_bfloat16) + { + queryShaderBfloat16Features.pNext = queryExtensionFeatures; + queryExtensionFeatures = &queryShaderBfloat16Features; + } + + // query integer dot product + memset(&queryShaderIntegerDotProductFeatures, 0, sizeof(queryShaderIntegerDotProductFeatures)); + queryShaderIntegerDotProductFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_FEATURES_KHR; + queryShaderIntegerDotProductFeatures.pNext = 0; + if (support_VK_KHR_shader_integer_dot_product) + { + queryShaderIntegerDotProductFeatures.pNext = queryExtensionFeatures; + queryExtensionFeatures = &queryShaderIntegerDotProductFeatures; + } + // query subgroup size control memset(&querySubgroupSizeControlFeatures, 0, sizeof(querySubgroupSizeControlFeatures)); querySubgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT; @@ -931,6 +962,16 @@ void GpuInfoPrivate::query_extension_properties() { queryDeviceProperties = 0; + // query integer dot product + memset(&queryShaderIntegerDotProductProperties, 0, sizeof(queryShaderIntegerDotProductProperties)); + queryShaderIntegerDotProductProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_PROPERTIES_KHR; + queryShaderIntegerDotProductProperties.pNext = 0; + if (support_VK_KHR_driver_properties) + { + queryShaderIntegerDotProductProperties.pNext = queryDeviceProperties; + queryDeviceProperties = &queryShaderIntegerDotProductProperties; + } + // query subgroup memset(&querySubgroupProperties, 0, sizeof(querySubgroupProperties)); querySubgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; @@ -1455,6 +1496,11 @@ bool GpuInfo::support_fp16_image() const return d->physicalDevicefeatures.shaderStorageImageExtendedFormats; } +bool GpuInfo::support_int8_image() const +{ + return d->physicalDevicefeatures.shaderStorageImageExtendedFormats; +} + bool GpuInfo::support_ycbcr_conversion() const { return d->querySamplerYcbcrConversionFeatures.samplerYcbcrConversion; @@ -1575,6 +1621,11 @@ int GpuInfo::support_VK_KHR_sampler_ycbcr_conversion() const return d->support_VK_KHR_sampler_ycbcr_conversion; } +int GpuInfo::support_VK_KHR_shader_bfloat16() const +{ + return d->support_VK_KHR_shader_bfloat16; +} + int GpuInfo::support_VK_KHR_shader_float16_int8() const { return d->support_VK_KHR_shader_float16_int8; @@ -1585,6 +1636,11 @@ int GpuInfo::support_VK_KHR_shader_float_controls() const return d->support_VK_KHR_shader_float_controls; } +int GpuInfo::support_VK_KHR_shader_integer_dot_product() const +{ + return d->support_VK_KHR_shader_integer_dot_product; +} + int GpuInfo::support_VK_KHR_shader_non_semantic_info() const { return d->support_VK_KHR_shader_non_semantic_info; @@ -2748,10 +2804,14 @@ VulkanDevice::VulkanDevice(int device_index) enabledExtensions.push_back("VK_KHR_push_descriptor"); if (info.support_VK_KHR_sampler_ycbcr_conversion()) enabledExtensions.push_back("VK_KHR_sampler_ycbcr_conversion"); + if (info.support_VK_KHR_shader_bfloat16()) + enabledExtensions.push_back("VK_KHR_shader_bfloat16"); if (info.support_VK_KHR_shader_float16_int8()) enabledExtensions.push_back("VK_KHR_shader_float16_int8"); if (info.support_VK_KHR_shader_float_controls()) enabledExtensions.push_back("VK_KHR_shader_float_controls"); + if (info.support_VK_KHR_shader_integer_dot_product()) + enabledExtensions.push_back("VK_KHR_shader_integer_dot_product"); if (info.support_VK_KHR_shader_non_semantic_info()) enabledExtensions.push_back("VK_KHR_shader_non_semantic_info"); if (info.support_VK_KHR_shader_subgroup_extended_types()) diff --git a/src/gpu.h b/src/gpu.h index b9541c01f..e7c1d93e5 100644 --- a/src/gpu.h +++ b/src/gpu.h @@ -287,8 +287,9 @@ public: bool support_int8_uniform() const; bool support_int8_arithmetic() const; - // r16f format in storage image + // r16f and r8s format in storage image bool support_fp16_image() const; + bool support_int8_image() const; // ycbcr conversion feature bool support_ycbcr_conversion() const; @@ -319,8 +320,10 @@ public: int support_VK_KHR_portability_subset() const; int support_VK_KHR_push_descriptor() const; int support_VK_KHR_sampler_ycbcr_conversion() const; + int support_VK_KHR_shader_bfloat16() const; int support_VK_KHR_shader_float16_int8() const; int support_VK_KHR_shader_float_controls() const; + int support_VK_KHR_shader_integer_dot_product() const; int support_VK_KHR_shader_non_semantic_info() const; int support_VK_KHR_shader_subgroup_extended_types() const; int support_VK_KHR_shader_subgroup_rotate() const; @@ -350,12 +353,15 @@ public: const VkPhysicalDeviceCooperativeMatrixFeaturesKHR& queryCooperativeMatrixFeatures() const; const VkPhysicalDeviceCooperativeMatrixFeaturesNV& queryCooperativeMatrixFeaturesNV() const; const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT& querySubgroupSizeControlFeatures() const; + const VkPhysicalDeviceShaderBfloat16FeaturesKHR& queryShaderBfloat16Features() const; + const VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR& queryShaderIntegerDotProductFeatures() const; const VkPhysicalDeviceShaderSubgroupRotateFeaturesKHR& queryShaderSubgroupRotateFeatures() const; const VkPhysicalDeviceShaderAtomicFloatFeaturesEXT& queryShaderAtomicFloatFeatures() const; const VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT& queryShaderAtomicFloat2Features() const; // extension properties const void* queryDeviceProperties() const; + const VkPhysicalDeviceShaderIntegerDotProductProperties& queryShaderIntegerDotProductProperties() const; const VkPhysicalDeviceSubgroupProperties& querySubgroupProperties() const; const VkPhysicalDeviceDriverPropertiesKHR& queryDriverProperties() const; const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& querySubgroupSizeControlProperties() const; diff --git a/src/vulkan_header_fix.h b/src/vulkan_header_fix.h index a55dc7acf..f967081c6 100644 --- a/src/vulkan_header_fix.h +++ b/src/vulkan_header_fix.h @@ -1384,4 +1384,69 @@ typedef struct VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT } VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT; #endif // VK_EXT_shader_atomic_float2 +#ifndef VK_KHR_shader_integer_dot_product +#define VK_KHR_shader_integer_dot_product 1 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_FEATURES (VkStructureType)1000280000 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_PROPERTIES (VkStructureType)1000280001 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_FEATURES_KHR VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_FEATURES +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_PROPERTIES_KHR VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_PROPERTIES +typedef struct VkPhysicalDeviceShaderIntegerDotProductFeatures +{ + VkStructureType sType; + void* pNext; + VkBool32 shaderIntegerDotProduct; +} VkPhysicalDeviceShaderIntegerDotProductFeatures; +typedef struct VkPhysicalDeviceShaderIntegerDotProductProperties +{ + VkStructureType sType; + void* pNext; + VkBool32 integerDotProduct8BitUnsignedAccelerated; + VkBool32 integerDotProduct8BitSignedAccelerated; + VkBool32 integerDotProduct8BitMixedSignednessAccelerated; + VkBool32 integerDotProduct4x8BitPackedUnsignedAccelerated; + VkBool32 integerDotProduct4x8BitPackedSignedAccelerated; + VkBool32 integerDotProduct4x8BitPackedMixedSignednessAccelerated; + VkBool32 integerDotProduct16BitUnsignedAccelerated; + VkBool32 integerDotProduct16BitSignedAccelerated; + VkBool32 integerDotProduct16BitMixedSignednessAccelerated; + VkBool32 integerDotProduct32BitUnsignedAccelerated; + VkBool32 integerDotProduct32BitSignedAccelerated; + VkBool32 integerDotProduct32BitMixedSignednessAccelerated; + VkBool32 integerDotProduct64BitUnsignedAccelerated; + VkBool32 integerDotProduct64BitSignedAccelerated; + VkBool32 integerDotProduct64BitMixedSignednessAccelerated; + VkBool32 integerDotProductAccumulatingSaturating8BitUnsignedAccelerated; + VkBool32 integerDotProductAccumulatingSaturating8BitSignedAccelerated; + VkBool32 integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated; + VkBool32 integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated; + VkBool32 integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated; + VkBool32 integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated; + VkBool32 integerDotProductAccumulatingSaturating16BitUnsignedAccelerated; + VkBool32 integerDotProductAccumulatingSaturating16BitSignedAccelerated; + VkBool32 integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated; + VkBool32 integerDotProductAccumulatingSaturating32BitUnsignedAccelerated; + VkBool32 integerDotProductAccumulatingSaturating32BitSignedAccelerated; + VkBool32 integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated; + VkBool32 integerDotProductAccumulatingSaturating64BitUnsignedAccelerated; + VkBool32 integerDotProductAccumulatingSaturating64BitSignedAccelerated; + VkBool32 integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated; +} VkPhysicalDeviceShaderIntegerDotProductProperties; +typedef VkPhysicalDeviceShaderIntegerDotProductFeatures VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR; +typedef VkPhysicalDeviceShaderIntegerDotProductProperties VkPhysicalDeviceShaderIntegerDotProductPropertiesKHR; +#endif // VK_KHR_shader_integer_dot_product + +#ifndef VK_KHR_shader_bfloat16 +#define VK_KHR_shader_bfloat16 1 +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_BFLOAT16_FEATURES_KHR (VkStructureType)1000141000 +#define VK_COMPONENT_TYPE_BFLOAT16_KHR (VkComponentTypeKHR)1000141000 +typedef struct VkPhysicalDeviceShaderBfloat16FeaturesKHR +{ + VkStructureType sType; + void* pNext; + VkBool32 shaderBFloat16Type; + VkBool32 shaderBFloat16DotProduct; + VkBool32 shaderBFloat16CooperativeMatrix; +} VkPhysicalDeviceShaderBfloat16FeaturesKHR; +#endif // VK_KHR_shader_bfloat16 + #endif // NCNN_VULKAN_HEADER_FIX_H