Browse Source

discover VK_EXT_shader_float8 (#6120)

pull/6121/head
nihui GitHub 11 months ago
parent
commit
8998a13d06
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
3 changed files with 100 additions and 0 deletions
  1. +84
    -0
      src/gpu.cpp
  2. +2
    -0
      src/gpu.h
  3. +14
    -0
      src/vulkan_header_fix.h

+ 84
- 0
src/gpu.cpp View File

@@ -358,6 +358,7 @@ public:
int support_VK_EXT_queue_family_foreign;
int support_VK_EXT_shader_atomic_float;
int support_VK_EXT_shader_atomic_float2;
int support_VK_EXT_shader_float8;
int support_VK_EXT_subgroup_size_control;
int support_VK_AMD_device_coherent_memory;
#if __ANDROID_API__ >= 26
@@ -377,6 +378,7 @@ public:
VkPhysicalDeviceCooperativeMatrixFeaturesNV queryCooperativeMatrixFeaturesNV;
VkPhysicalDeviceCooperativeMatrix2FeaturesNV queryCooperativeMatrix2FeaturesNV;
VkPhysicalDeviceShaderBfloat16FeaturesKHR queryShaderBfloat16Features;
VkPhysicalDeviceShaderFloat8FeaturesEXT queryShaderFloat8Features;
VkPhysicalDeviceShaderFloatControls2FeaturesKHR queryShaderFloatControls2Features;
VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR queryShaderIntegerDotProductFeatures;
VkPhysicalDeviceSubgroupSizeControlFeaturesEXT querySubgroupSizeControlFeatures;
@@ -715,6 +717,7 @@ int GpuInfoPrivate::query_extensions()
support_VK_EXT_queue_family_foreign = 0;
support_VK_EXT_shader_atomic_float = 0;
support_VK_EXT_shader_atomic_float2 = 0;
support_VK_EXT_shader_float8 = 0;
support_VK_EXT_subgroup_size_control = 0;
support_VK_AMD_device_coherent_memory = 0;
#if __ANDROID_API__ >= 26
@@ -800,6 +803,8 @@ int GpuInfoPrivate::query_extensions()
support_VK_EXT_shader_atomic_float = exp.specVersion;
else if (strcmp(exp.extensionName, "VK_EXT_shader_atomic_float2") == 0)
support_VK_EXT_shader_atomic_float2 = exp.specVersion;
else if (strcmp(exp.extensionName, "VK_EXT_shader_float8") == 0)
support_VK_EXT_shader_float8 = exp.specVersion;
else if (strcmp(exp.extensionName, "VK_EXT_subgroup_size_control") == 0)
support_VK_EXT_subgroup_size_control = exp.specVersion;
else if (strcmp(exp.extensionName, "VK_AMD_device_coherent_memory") == 0)
@@ -915,6 +920,16 @@ void GpuInfoPrivate::query_extension_features()
queryExtensionFeatures = &queryShaderBfloat16Features;
}

// query float8
memset(&queryShaderFloat8Features, 0, sizeof(queryShaderFloat8Features));
queryShaderFloat8Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT8_FEATURES_EXT;
queryShaderFloat8Features.pNext = 0;
if (support_VK_EXT_shader_float8)
{
queryShaderFloat8Features.pNext = queryExtensionFeatures;
queryExtensionFeatures = &queryShaderFloat8Features;
}

// query float controls 2
memset(&queryShaderFloatControls2Features, 0, sizeof(queryShaderFloatControls2Features));
queryShaderFloatControls2Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT_CONTROLS_2_FEATURES_KHR;
@@ -1865,6 +1880,11 @@ int GpuInfo::support_VK_EXT_shader_atomic_float2() const
return d->support_VK_EXT_shader_atomic_float2;
}

int GpuInfo::support_VK_EXT_shader_float8() const
{
return d->support_VK_EXT_shader_float8;
}

int GpuInfo::support_VK_EXT_subgroup_size_control() const
{
return d->support_VK_EXT_subgroup_size_control;
@@ -1952,6 +1972,11 @@ const VkPhysicalDeviceShaderBfloat16FeaturesKHR& GpuInfo::queryShaderBfloat16Fea
return d->queryShaderBfloat16Features;
}

const VkPhysicalDeviceShaderFloat8FeaturesEXT& GpuInfo::queryShaderFloat8Features() const
{
return d->queryShaderFloat8Features;
}

const VkPhysicalDeviceShaderFloatControls2FeaturesKHR& GpuInfo::queryShaderFloatControls2Features() const
{
return d->queryShaderFloatControls2Features;
@@ -3084,6 +3109,8 @@ VulkanDevice::VulkanDevice(int device_index)
enabledExtensions.push_back("VK_EXT_shader_atomic_float");
if (info.support_VK_EXT_shader_atomic_float2())
enabledExtensions.push_back("VK_EXT_shader_atomic_float2");
if (info.support_VK_EXT_shader_float8())
enabledExtensions.push_back("VK_EXT_shader_float8");
if (info.support_VK_EXT_subgroup_size_control())
enabledExtensions.push_back("VK_EXT_subgroup_size_control");
if (info.support_VK_AMD_device_coherent_memory())
@@ -5065,6 +5092,29 @@ int compile_spirv_module(const char* comp_data, int comp_data_size, const Option
DD_APPEND_FEATURE(subgroupSizeControl)
DD_APPEND_FEATURE(computeFullSubgroups)
}
if (info.support_VK_KHR_shader_bfloat16())
{
const VkPhysicalDeviceShaderBfloat16FeaturesKHR& features = info.queryShaderBfloat16Features();
DD_APPEND_FEATURE(shaderBFloat16Type)
DD_APPEND_FEATURE(shaderBFloat16DotProduct)
DD_APPEND_FEATURE(shaderBFloat16CooperativeMatrix)
}
if (info.support_VK_EXT_shader_float8())
{
const VkPhysicalDeviceShaderFloat8FeaturesEXT& features = info.queryShaderFloat8Features();
DD_APPEND_FEATURE(shaderFloat8)
DD_APPEND_FEATURE(shaderFloat8CooperativeMatrix)
}
if (info.support_VK_KHR_shader_float_controls2())
{
const VkPhysicalDeviceShaderFloatControls2FeaturesKHR& features = info.queryShaderFloatControls2Features();
DD_APPEND_FEATURE(shaderFloatControls2)
}
if (info.support_VK_KHR_shader_integer_dot_product())
{
const VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR& features = info.queryShaderIntegerDotProductFeatures();
DD_APPEND_FEATURE(shaderIntegerDotProduct)
}
if (info.support_VK_KHR_shader_subgroup_rotate())
{
const VkPhysicalDeviceShaderSubgroupRotateFeaturesKHR& features = info.queryShaderSubgroupRotateFeatures();
@@ -5296,6 +5346,40 @@ int compile_spirv_module(const char* comp_data, int comp_data_size, const Option
device_defines.append("conformanceVersion_subminor", properties.conformanceVersion.subminor);
device_defines.append("conformanceVersion_patch", properties.conformanceVersion.patch);
}
if (info.support_VK_KHR_shader_integer_dot_product())
{
const VkPhysicalDeviceShaderIntegerDotProductProperties& properties = info.queryShaderIntegerDotProductProperties();
DD_APPEND_PROPERTY(integerDotProduct8BitUnsignedAccelerated)
DD_APPEND_PROPERTY(integerDotProduct8BitSignedAccelerated)
DD_APPEND_PROPERTY(integerDotProduct8BitMixedSignednessAccelerated)
DD_APPEND_PROPERTY(integerDotProduct4x8BitPackedUnsignedAccelerated)
DD_APPEND_PROPERTY(integerDotProduct4x8BitPackedSignedAccelerated)
DD_APPEND_PROPERTY(integerDotProduct4x8BitPackedMixedSignednessAccelerated)
DD_APPEND_PROPERTY(integerDotProduct16BitUnsignedAccelerated)
DD_APPEND_PROPERTY(integerDotProduct16BitSignedAccelerated)
DD_APPEND_PROPERTY(integerDotProduct16BitMixedSignednessAccelerated)
DD_APPEND_PROPERTY(integerDotProduct32BitUnsignedAccelerated)
DD_APPEND_PROPERTY(integerDotProduct32BitSignedAccelerated)
DD_APPEND_PROPERTY(integerDotProduct32BitMixedSignednessAccelerated)
DD_APPEND_PROPERTY(integerDotProduct64BitUnsignedAccelerated)
DD_APPEND_PROPERTY(integerDotProduct64BitSignedAccelerated)
DD_APPEND_PROPERTY(integerDotProduct64BitMixedSignednessAccelerated)
DD_APPEND_PROPERTY(integerDotProductAccumulatingSaturating8BitUnsignedAccelerated)
DD_APPEND_PROPERTY(integerDotProductAccumulatingSaturating8BitSignedAccelerated)
DD_APPEND_PROPERTY(integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated)
DD_APPEND_PROPERTY(integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated)
DD_APPEND_PROPERTY(integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated)
DD_APPEND_PROPERTY(integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated)
DD_APPEND_PROPERTY(integerDotProductAccumulatingSaturating16BitUnsignedAccelerated)
DD_APPEND_PROPERTY(integerDotProductAccumulatingSaturating16BitSignedAccelerated)
DD_APPEND_PROPERTY(integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated)
DD_APPEND_PROPERTY(integerDotProductAccumulatingSaturating32BitUnsignedAccelerated)
DD_APPEND_PROPERTY(integerDotProductAccumulatingSaturating32BitSignedAccelerated)
DD_APPEND_PROPERTY(integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated)
DD_APPEND_PROPERTY(integerDotProductAccumulatingSaturating64BitUnsignedAccelerated)
DD_APPEND_PROPERTY(integerDotProductAccumulatingSaturating64BitSignedAccelerated)
DD_APPEND_PROPERTY(integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated)
}
if (info.support_VK_EXT_subgroup_size_control())
{
const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& properties = info.querySubgroupSizeControlProperties();


+ 2
- 0
src/gpu.h View File

@@ -347,6 +347,7 @@ public:
int support_VK_EXT_queue_family_foreign() const;
int support_VK_EXT_shader_atomic_float() const;
int support_VK_EXT_shader_atomic_float2() const;
int support_VK_EXT_shader_float8() const;
int support_VK_EXT_subgroup_size_control() const;
int support_VK_AMD_device_coherent_memory() const;
#if __ANDROID_API__ >= 26
@@ -368,6 +369,7 @@ public:
const VkPhysicalDeviceCooperativeVectorFeaturesNV& queryCooperativeVectorFeaturesNV() const;
const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT& querySubgroupSizeControlFeatures() const;
const VkPhysicalDeviceShaderBfloat16FeaturesKHR& queryShaderBfloat16Features() const;
const VkPhysicalDeviceShaderFloat8FeaturesEXT& queryShaderFloat8Features() const;
const VkPhysicalDeviceShaderFloatControls2FeaturesKHR& queryShaderFloatControls2Features() const;
const VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR& queryShaderIntegerDotProductFeatures() const;
const VkPhysicalDeviceShaderSubgroupRotateFeaturesKHR& queryShaderSubgroupRotateFeatures() const;


+ 14
- 0
src/vulkan_header_fix.h View File

@@ -1622,4 +1622,18 @@ typedef struct VkPhysicalDeviceCooperativeMatrix2PropertiesNV
typedef VkResult(VKAPI_PTR* PFN_vkGetPhysicalDeviceCooperativeMatrixFlexibleDimensionsPropertiesNV)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkCooperativeMatrixFlexibleDimensionsPropertiesNV* pProperties);
#endif // VK_NV_cooperative_matrix2

#ifndef VK_EXT_shader_float8
#define VK_EXT_shader_float8 1
#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT8_FEATURES_EXT (VkStructureType)1000567000
#define VK_COMPONENT_TYPE_FLOAT8_E4M3_EXT (VkComponentTypeKHR)1000567000
#define VK_COMPONENT_TYPE_FLOAT8_E5M2_EXT (VkComponentTypeKHR)1000567001
typedef struct VkPhysicalDeviceShaderFloat8FeaturesEXT
{
VkStructureType sType;
void* pNext;
VkBool32 shaderFloat8;
VkBool32 shaderFloat8CooperativeMatrix;
} VkPhysicalDeviceShaderFloat8FeaturesEXT;
#endif // VK_EXT_shader_float8

#endif // NCNN_VULKAN_HEADER_FIX_H

Loading…
Cancel
Save