| @@ -689,6 +689,7 @@ ifneq ($(NO_SVE), 1) | |||
| DYNAMIC_CORE += NEOVERSEV1 | |||
| DYNAMIC_CORE += NEOVERSEN2 | |||
| DYNAMIC_CORE += ARMV8SVE | |||
| DYNAMIC_CORE += A64FX | |||
| endif | |||
| DYNAMIC_CORE += THUNDERX | |||
| DYNAMIC_CORE += THUNDERX2T99 | |||
| @@ -46,7 +46,7 @@ if (DYNAMIC_ARCH) | |||
| if (ARM64) | |||
| set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110) | |||
| if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 9.99) | |||
| set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE) | |||
| set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX) | |||
| endif () | |||
| if (DYNAMIC_LIST) | |||
| set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST}) | |||
| @@ -1218,6 +1218,37 @@ endif () | |||
| set(ZGEMM_UNROLL_M 4) | |||
| set(ZGEMM_UNROLL_N 4) | |||
| set(SYMV_P 16) | |||
| elseif ("${TCORE}" STREQUAL "A64FX") | |||
| file(APPEND ${TARGET_CONF_TEMP} | |||
| "#define L1_CODE_SIZE\t65536\n" | |||
| "#define L1_CODE_LINESIZE\t256\n" | |||
| "#define L1_CODE_ASSOCIATIVE\t8\n" | |||
| "#define L1_DATA_SIZE\t32768\n" | |||
| "#define L1_DATA_LINESIZE\t256\n" | |||
| "#define L1_DATA_ASSOCIATIVE\t8\n" | |||
| "#define L2_SIZE\t8388608\n\n" | |||
| "#define L2_LINESIZE\t256\n" | |||
| "#define L2_ASSOCIATIVE\t8\n" | |||
| "#define L3_SIZE\t0\n\n" | |||
| "#define L3_LINESIZE\t0\n\n" | |||
| "#define L3_ASSOCIATIVE\t0\n\n" | |||
| "#define DTB_DEFAULT_ENTRIES\t64\n" | |||
| "#define DTB_SIZE\t4096\n" | |||
| "#define HAVE_VFPV4\n" | |||
| "#define HAVE_VFPV3\n" | |||
| "#define HAVE_VFP\n" | |||
| "#define HAVE_NEON\n" | |||
| "#define HAVE_SVE\n" | |||
| "#define ARMV8\n") | |||
| set(SGEMM_UNROLL_M 4) | |||
| set(SGEMM_UNROLL_N 8) | |||
| set(DGEMM_UNROLL_M 2) | |||
| set(DGEMM_UNROLL_N 8) | |||
| set(CGEMM_UNROLL_M 2) | |||
| set(CGEMM_UNROLL_N 4) | |||
| set(ZGEMM_UNROLL_M 2) | |||
| set(ZGEMM_UNROLL_N 4) | |||
| set(SYMV_P 16) | |||
| elseif ("${TCORE}" STREQUAL "P5600") | |||
| file(APPEND ${TARGET_CONF_TEMP} | |||
| "#define L2_SIZE 1048576\n" | |||
| @@ -310,6 +310,18 @@ if (${TARGET} STREQUAL NEOVERSEV1) | |||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve") | |||
| endif() | |||
| endif() | |||
| if (${TARGET} STREQUAL A64FX) | |||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE) | |||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.2-a+sve -mtune=a64fx") | |||
| else () | |||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||
| if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) | |||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve -mtune=a64fx") | |||
| else () | |||
| message(FATAL_ERROR "Compiler $${CMAKE_C_COMPILER} {GCC_VERSION} does not support A64FX.") | |||
| endif() | |||
| endif() | |||
| endif() | |||
| endif() | |||
| @@ -120,6 +120,11 @@ extern gotoblas_t gotoblas_CORTEXA55; | |||
| #else | |||
| #define gotoblas_CORTEXA55 gotoblas_ARMV8 | |||
| #endif | |||
| #ifdef DYN_A64FX | |||
| extern gotoblas_t gotoblas_A64FX; | |||
| #else | |||
| #define gotoblas_A64FX gotoblas_ARMV8 | |||
| #endif | |||
| #else | |||
| extern gotoblas_t gotoblas_CORTEXA53; | |||
| #define gotoblas_CORTEXA55 gotoblas_CORTEXA53 | |||
| @@ -136,10 +141,12 @@ extern gotoblas_t gotoblas_NEOVERSEN1; | |||
| extern gotoblas_t gotoblas_NEOVERSEV1; | |||
| extern gotoblas_t gotoblas_NEOVERSEN2; | |||
| extern gotoblas_t gotoblas_ARMV8SVE; | |||
| extern gotoblas_t gotoblas_A64FX; | |||
| #else | |||
| #define gotoblas_NEOVERSEV1 gotoblas_ARMV8 | |||
| #define gotoblas_NEOVERSEN2 gotoblas_ARMV8 | |||
| #define gotoblas_ARMV8SVE gotoblas_ARMV8 | |||
| #define gotoblas_A64FX gotoblas_ARMV8 | |||
| #endif | |||
| extern gotoblas_t gotoblas_THUNDERX3T110; | |||
| #endif | |||
| @@ -149,7 +156,7 @@ extern void openblas_warning(int verbose, const char * msg); | |||
| #define FALLBACK_VERBOSE 1 | |||
| #define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n" | |||
| #define NUM_CORETYPES 17 | |||
| #define NUM_CORETYPES 18 | |||
| /* | |||
| * In case asm/hwcap.h is outdated on the build system, make sure | |||
| @@ -184,6 +191,7 @@ static char *corename[] = { | |||
| "thunderx3t110", | |||
| "cortexa55", | |||
| "armv8sve", | |||
| "a64fx", | |||
| "unknown" | |||
| }; | |||
| @@ -205,6 +213,7 @@ char *gotoblas_corename(void) { | |||
| if (gotoblas == &gotoblas_THUNDERX3T110) return corename[14]; | |||
| if (gotoblas == &gotoblas_CORTEXA55) return corename[15]; | |||
| if (gotoblas == &gotoblas_ARMV8SVE) return corename[16]; | |||
| if (gotoblas == &gotoblas_A64FX) return corename[17]; | |||
| return corename[NUM_CORETYPES]; | |||
| } | |||
| @@ -241,6 +250,7 @@ static gotoblas_t *force_coretype(char *coretype) { | |||
| case 14: return (&gotoblas_THUNDERX3T110); | |||
| case 15: return (&gotoblas_CORTEXA55); | |||
| case 16: return (&gotoblas_ARMV8SVE); | |||
| case 17: return (&gotoblas_A64FX); | |||
| } | |||
| snprintf(message, 128, "Core not found: %s\n", coretype); | |||
| openblas_warning(1, message); | |||
| @@ -346,6 +356,15 @@ static gotoblas_t *get_coretype(void) { | |||
| return &gotoblas_THUNDERX3T110; | |||
| } | |||
| break; | |||
| case 0x46: // Fujitsu | |||
| switch (part) | |||
| { | |||
| #ifndef NO_SVE | |||
| case 0x001: // A64FX | |||
| return &gotoblas_A64FX; | |||
| #endif | |||
| } | |||
| break; | |||
| case 0x48: // HiSilicon | |||
| switch (part) | |||
| { | |||