diff --git a/src/gpu.cpp b/src/gpu.cpp index 62247af86..f3bb56d03 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -3759,10 +3759,11 @@ VkShaderModule VulkanDevice::compile_shader_module(const uint32_t* spv_data, siz if (fast_math_flag != 0) { std::vector buffer; - inject_fast_math(spv_data_modified, spv_data_size_modified, buffer,fast_math_flag); + inject_fast_math(spv_data_modified, spv_data_size_modified, buffer, fast_math_flag); shader_module = compile_shader_module(buffer.data(), buffer.size() * sizeof(uint32_t)); - } else + } + else { shader_module = compile_shader_module(spv_data_modified, spv_data_size_modified); } diff --git a/src/option.h b/src/option.h index 2abdff852..40f421134 100644 --- a/src/option.h +++ b/src/option.h @@ -61,14 +61,14 @@ public: { // Base VK_FAST_MATH_FLAG_DISABLE = 0x0, - VK_FAST_MATH_FLAG_NotNaN = 0x1, // Assume parameters and result are not NaN. If this assumption does not hold then the operation returns an undefined value. - VK_FAST_MATH_FLAG_NotInf = 0x2, // Assume parameters and result are not +/- Inf. If this assumption does not hold then the operation returns an undefined value. - VK_FAST_MATH_FLAG_NSZ = 0x4, // Treat the sign of a zero parameter or result as insignificant. + VK_FAST_MATH_FLAG_NotNaN = 0x1, // Assume parameters and result are not NaN. If this assumption does not hold then the operation returns an undefined value. + VK_FAST_MATH_FLAG_NotInf = 0x2, // Assume parameters and result are not +/- Inf. If this assumption does not hold then the operation returns an undefined value. + VK_FAST_MATH_FLAG_NSZ = 0x4, // Treat the sign of a zero parameter or result as insignificant. VK_FAST_MATH_FLAG_AllowRecip = 0x8, // Allow the usage of reciprocal rather than perform a division. - VK_FAST_MATH_FLAG_Fast = 0x10, // Allow algebraic transformations according to real-number associative and distributive algebra. This flag implies above; + VK_FAST_MATH_FLAG_Fast = 0x10, // Allow algebraic transformations according to real-number associative and distributive algebra. This flag implies above; // FloatControls2 - VK_FAST_MATH_FLAG_AllowContract = 0x10000, // Allows a floating-point operation to be contracted with any operation(s) producing its operands. Rounding steps may be eliminated or may preserve higher bit-depth than the specified types. The instructions producing the operands do not need to be decorated to allow this transformation. - VK_FAST_MATH_FLAG_AllowReassoc = 0x20000, // Allows a floating-point operation to be reordered with any operation(s) producing its operands according to real-number associativity rules. The instructions producing the operands do not need to be decorated to allow this transformation. + VK_FAST_MATH_FLAG_AllowContract = 0x10000, // Allows a floating-point operation to be contracted with any operation(s) producing its operands. Rounding steps may be eliminated or may preserve higher bit-depth than the specified types. The instructions producing the operands do not need to be decorated to allow this transformation. + VK_FAST_MATH_FLAG_AllowReassoc = 0x20000, // Allows a floating-point operation to be reordered with any operation(s) producing its operands according to real-number associativity rules. The instructions producing the operands do not need to be decorated to allow this transformation. VK_FAST_MATH_FLAG_AllowTransform = 0x40000, // Allows a floating-point operation to be transformed with any operation(s) producing its operands according to real-number rules. This is a superset of AllowContract and AllowReassoc and those bits must be set whenever this bit is set. The instructions producing the operands do not need to be decorated to allow this transformation, but note that non-trivial transformations may require multiple instructions to be decorated. }; diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 99b87a65a..cf59a6ecb 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -223,7 +223,7 @@ int Pipeline::create(const uint32_t* spv_data, size_t spv_data_size, const std:: // get from pipeline cache return pipeline_cache->get_pipeline(spv_data, spv_data_size, specializations, d->local_size_x, d->local_size_y, d->local_size_z, d->subgroup_size, &d->shader_module, &d->descriptorset_layout, &d->pipeline_layout, &d->pipeline, &d->descriptor_update_template, - d->shader_info,fast_math_flag); + d->shader_info, fast_math_flag); } int Pipeline::create(int shader_type_index, const Option& opt, const std::vector& specializations) diff --git a/src/pipelinecache.cpp b/src/pipelinecache.cpp index 12e4f2da5..59b4e12d6 100644 --- a/src/pipelinecache.cpp +++ b/src/pipelinecache.cpp @@ -119,7 +119,7 @@ public: }; PipelineCachePrivate::pipeline_cache_digest::pipeline_cache_digest(const uint32_t* spv_data, size_t spv_data_size, const std::vector& specializations, - uint32_t _local_size_x, uint32_t _local_size_y, uint32_t _local_size_z, uint32_t _subgroup_size, uint32_t _fast_math_flag) + uint32_t _local_size_x, uint32_t _local_size_y, uint32_t _local_size_z, uint32_t _subgroup_size, uint32_t _fast_math_flag) { spv_data_murmur3 = murmur3_32(spv_data, spv_data_size / 4); @@ -139,7 +139,7 @@ PipelineCachePrivate::pipeline_cache_digest::pipeline_cache_digest(const uint32_ } PipelineCachePrivate::pipeline_cache_digest::pipeline_cache_digest(int _shader_type_index, const Option& opt, const std::vector& specializations, - uint32_t _local_size_x, uint32_t _local_size_y, uint32_t _local_size_z, uint32_t _subgroup_size) + uint32_t _local_size_x, uint32_t _local_size_y, uint32_t _local_size_z, uint32_t _subgroup_size) { shader_type_index = _shader_type_index; diff --git a/tests/test_fast_math.cpp b/tests/test_fast_math.cpp index a224287da..481b9638e 100644 --- a/tests/test_fast_math.cpp +++ b/tests/test_fast_math.cpp @@ -108,7 +108,6 @@ static int test_vulkan_fast_math() net_fast_math.load_model(dr); printf("Fast math net loaded successfully.\n"); - // ================================================== // 3. Warm-up Run // ================================================== @@ -128,7 +127,6 @@ static int test_vulkan_fast_math() } printf("Warm-up complete.\n"); - // ================================================== // 4. Benchmark Performance // ================================================== @@ -208,7 +206,6 @@ int main(int argc, char** argv) device_index = atoi(argv[1]); } - int gpu_count = ncnn::get_gpu_count(); if (device_index < 0 || device_index >= gpu_count) {