The compiler options that enable 16 bit floating point instructions should not be enabled by default when building the RISCV64_ZVL128B and RISCV64_ZVL256B targets. The zfh and zvfh extensions are not part of the 'V' extension and are not required by any of the RVA profiles. There's no guarantee that kernels built with zfh and zvfh will work correctly on fully compliant RVA23U64 devices. To fix the issue we only build the RISCV64_ZVL128B and RISCV64_ZVL256B kernels with the half float flags if BUILD_HFLOAT16=1. We also update the RISC-V dynamic detection code to disable the RISCV64_ZVL128B and RISCV64_ZVL256B kernels at runtime if we've built with DYNAMIC_ARCH=1 and BUILD_HFLOAT16=1 and are running on a device that does not support both Zfh and Zvfh. Fixes: https://github.com/OpenMathLib/OpenBLAS/issues/5428pull/5431/head
| @@ -64,11 +64,11 @@ TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d | |||||
| endif | endif | ||||
| ifeq ($(TARGET), RISCV64_ZVL256B) | ifeq ($(TARGET), RISCV64_ZVL256B) | ||||
| TARGET_FLAGS = -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | |||||
| TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d | |||||
| endif | endif | ||||
| ifeq ($(TARGET), RISCV64_ZVL128B) | ifeq ($(TARGET), RISCV64_ZVL128B) | ||||
| TARGET_FLAGS = -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | |||||
| TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d | |||||
| endif | endif | ||||
| ifeq ($(TARGET), RISCV64_GENERIC) | ifeq ($(TARGET), RISCV64_GENERIC) | ||||
| @@ -7,12 +7,22 @@ CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d | |||||
| FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static | FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static | ||||
| endif | endif | ||||
| ifeq ($(CORE), RISCV64_ZVL256B) | ifeq ($(CORE), RISCV64_ZVL256B) | ||||
| ifeq ($(BUILD_HFLOAT16), 1) | |||||
| CCOMMON_OPT += -march=rv64imafdcv_zvl256b_zvfh_zfh -mabi=lp64d | CCOMMON_OPT += -march=rv64imafdcv_zvl256b_zvfh_zfh -mabi=lp64d | ||||
| FCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | FCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | ||||
| else | |||||
| CCOMMON_OPT += -march=rv64imafdcv_zvl256b -mabi=lp64d | |||||
| FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d | |||||
| endif | |||||
| endif | endif | ||||
| ifeq ($(CORE), RISCV64_ZVL128B) | ifeq ($(CORE), RISCV64_ZVL128B) | ||||
| CCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | |||||
| ifeq ($(BUILD_HFLOAT16), 1) | |||||
| CCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | |||||
| FCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | FCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | ||||
| else | |||||
| CCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d | |||||
| FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d | |||||
| endif | |||||
| endif | endif | ||||
| ifeq ($(CORE), RISCV64_GENERIC) | ifeq ($(CORE), RISCV64_GENERIC) | ||||
| CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d | CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d | ||||
| @@ -218,7 +218,7 @@ mulx.$(SUFFIX) : $(ARCH)/mulx.c | |||||
| $(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $(@F) | $(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $(@F) | ||||
| detect_riscv64.$(SUFFIX): detect_riscv64.c | detect_riscv64.$(SUFFIX): detect_riscv64.c | ||||
| $(CC) $(CFLAGS) -c -march=rv64imafdcv_zvfh_zfh $< -o $(@F) | |||||
| $(CC) $(CFLAGS) -c -march=rv64imafdcv $< -o $(@F) | |||||
| xerbla.$(PSUFFIX) : xerbla.c | xerbla.$(PSUFFIX) : xerbla.c | ||||
| $(CC) $(PFLAGS) -c $< -o $(@F) | $(CC) $(PFLAGS) -c $< -o $(@F) | ||||
| @@ -97,6 +97,8 @@ struct riscv_hwprobe { | |||||
| #define RISCV_HWPROBE_KEY_IMA_EXT_0 4 | #define RISCV_HWPROBE_KEY_IMA_EXT_0 4 | ||||
| #define RISCV_HWPROBE_IMA_V (1 << 2) | #define RISCV_HWPROBE_IMA_V (1 << 2) | ||||
| #define RISCV_HWPROBE_EXT_ZFH (1 << 27) | |||||
| #define RISCV_HWPROBE_EXT_ZVFH (1 << 30) | |||||
| #ifndef NR_riscv_hwprobe | #ifndef NR_riscv_hwprobe | ||||
| #ifndef NR_arch_specific_syscall | #ifndef NR_arch_specific_syscall | ||||
| @@ -147,6 +149,7 @@ char* gotoblas_corename(void) { | |||||
| } | } | ||||
| static gotoblas_t* get_coretype(void) { | static gotoblas_t* get_coretype(void) { | ||||
| uint64_t vector_mask; | |||||
| unsigned vlenb = 0; | unsigned vlenb = 0; | ||||
| #if !defined(OS_LINUX) | #if !defined(OS_LINUX) | ||||
| @@ -165,14 +168,23 @@ static gotoblas_t* get_coretype(void) { | |||||
| }; | }; | ||||
| int ret = syscall(NR_riscv_hwprobe, pairs, 1, 0, NULL, 0); | int ret = syscall(NR_riscv_hwprobe, pairs, 1, 0, NULL, 0); | ||||
| if (ret == 0) { | if (ret == 0) { | ||||
| if (!(pairs[0].value & RISCV_HWPROBE_IMA_V)) | |||||
| #if defined(BUILD_HFLOAT16) | |||||
| vector_mask = (RISCV_HWPROBE_IMA_V | RISCV_HWPROBE_EXT_ZFH | RISCV_HWPROBE_EXT_ZVFH); | |||||
| #else | |||||
| vector_mask = RISCV_HWPROBE_IMA_V; | |||||
| #endif | |||||
| if ((pairs[0].value & vector_mask) != vector_mask) | |||||
| return NULL; | return NULL; | ||||
| } else { | } else { | ||||
| #if defined(BUILD_HFLOAT16) | |||||
| return NULL; | |||||
| #else | |||||
| if (!(getauxval(AT_HWCAP) & DETECT_RISCV64_HWCAP_ISA_V)) | if (!(getauxval(AT_HWCAP) & DETECT_RISCV64_HWCAP_ISA_V)) | ||||
| return NULL; | return NULL; | ||||
| if (!detect_riscv64_rvv100()) | if (!detect_riscv64_rvv100()) | ||||
| return NULL; | return NULL; | ||||
| #endif | |||||
| } | } | ||||
| /* | /* | ||||