From ce79fe12fdacdfd5d48c4a61a08f86aa6170eae9 Mon Sep 17 00:00:00 2001 From: Mark Ryan Date: Wed, 27 Aug 2025 10:15:09 +0000 Subject: [PATCH] disable fp16 flags on RISC-V unless BUILD_HFLOAT16=1 The compiler options that enable 16 bit floating point instructions should not be enabled by default when building the RISCV64_ZVL128B and RISCV64_ZVL256B targets. The zfh and zvfh extensions are not part of the 'V' extension and are not required by any of the RVA profiles. There's no guarantee that kernels built with zfh and zvfh will work correctly on fully compliant RVA23U64 devices. To fix the issue we only build the RISCV64_ZVL128B and RISCV64_ZVL256B kernels with the half float flags if BUILD_HFLOAT16=1. We also update the RISC-V dynamic detection code to disable the RISCV64_ZVL128B and RISCV64_ZVL256B kernels at runtime if we've built with DYNAMIC_ARCH=1 and BUILD_HFLOAT16=1 and are running on a device that does not support both Zfh and Zvfh. Fixes: https://github.com/OpenMathLib/OpenBLAS/issues/5428 --- Makefile.prebuild | 4 ++-- Makefile.riscv64 | 12 +++++++++++- driver/others/Makefile | 2 +- driver/others/dynamic_riscv64.c | 14 +++++++++++++- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/Makefile.prebuild b/Makefile.prebuild index b6c8d552f..b7d695a75 100644 --- a/Makefile.prebuild +++ b/Makefile.prebuild @@ -64,11 +64,11 @@ TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d endif ifeq ($(TARGET), RISCV64_ZVL256B) -TARGET_FLAGS = -march=rv64imafdcv_zvfh_zfh -mabi=lp64d +TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d endif ifeq ($(TARGET), RISCV64_ZVL128B) -TARGET_FLAGS = -march=rv64imafdcv_zvfh_zfh -mabi=lp64d +TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d endif ifeq ($(TARGET), RISCV64_GENERIC) diff --git a/Makefile.riscv64 b/Makefile.riscv64 index 8fe734186..cbabcadab 100644 --- a/Makefile.riscv64 +++ b/Makefile.riscv64 @@ -7,12 +7,22 @@ CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static endif ifeq ($(CORE), RISCV64_ZVL256B) +ifeq ($(BUILD_HFLOAT16), 1) CCOMMON_OPT += -march=rv64imafdcv_zvl256b_zvfh_zfh -mabi=lp64d FCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d +else +CCOMMON_OPT += -march=rv64imafdcv_zvl256b -mabi=lp64d +FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d +endif endif ifeq ($(CORE), RISCV64_ZVL128B) -CCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d +ifeq ($(BUILD_HFLOAT16), 1) +CCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d FCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d +else +CCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d +FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d +endif endif ifeq ($(CORE), RISCV64_GENERIC) CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d diff --git a/driver/others/Makefile b/driver/others/Makefile index 6a53638bc..719d617c4 100644 --- a/driver/others/Makefile +++ b/driver/others/Makefile @@ -218,7 +218,7 @@ mulx.$(SUFFIX) : $(ARCH)/mulx.c $(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $(@F) detect_riscv64.$(SUFFIX): detect_riscv64.c - $(CC) $(CFLAGS) -c -march=rv64imafdcv_zvfh_zfh $< -o $(@F) + $(CC) $(CFLAGS) -c -march=rv64imafdcv $< -o $(@F) xerbla.$(PSUFFIX) : xerbla.c $(CC) $(PFLAGS) -c $< -o $(@F) diff --git a/driver/others/dynamic_riscv64.c b/driver/others/dynamic_riscv64.c index 78e3bb67a..35ddb277c 100644 --- a/driver/others/dynamic_riscv64.c +++ b/driver/others/dynamic_riscv64.c @@ -97,6 +97,8 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_KEY_IMA_EXT_0 4 #define RISCV_HWPROBE_IMA_V (1 << 2) +#define RISCV_HWPROBE_EXT_ZFH (1 << 27) +#define RISCV_HWPROBE_EXT_ZVFH (1 << 30) #ifndef NR_riscv_hwprobe #ifndef NR_arch_specific_syscall @@ -147,6 +149,7 @@ char* gotoblas_corename(void) { } static gotoblas_t* get_coretype(void) { + uint64_t vector_mask; unsigned vlenb = 0; #if !defined(OS_LINUX) @@ -165,14 +168,23 @@ static gotoblas_t* get_coretype(void) { }; int ret = syscall(NR_riscv_hwprobe, pairs, 1, 0, NULL, 0); if (ret == 0) { - if (!(pairs[0].value & RISCV_HWPROBE_IMA_V)) +#if defined(BUILD_HFLOAT16) + vector_mask = (RISCV_HWPROBE_IMA_V | RISCV_HWPROBE_EXT_ZFH | RISCV_HWPROBE_EXT_ZVFH); +#else + vector_mask = RISCV_HWPROBE_IMA_V; +#endif + if ((pairs[0].value & vector_mask) != vector_mask) return NULL; } else { +#if defined(BUILD_HFLOAT16) + return NULL; +#else if (!(getauxval(AT_HWCAP) & DETECT_RISCV64_HWCAP_ISA_V)) return NULL; if (!detect_riscv64_rvv100()) return NULL; +#endif } /*