| @@ -55,6 +55,13 @@ FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73 | |||
| endif | |||
| endif | |||
| ifeq ($(CORE), FT2000) | |||
| CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||
| ifneq ($(F_COMPILER), NAG) | |||
| FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||
| endif | |||
| endif | |||
| # Use a72 tunings because Neoverse-N1 is only available | |||
| # in GCC>=9 | |||
| ifeq ($(CORE), NEOVERSEN1) | |||
| @@ -229,6 +236,43 @@ endif | |||
| endif | |||
| endif | |||
| ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
| ifeq ($(CORE), CORTEXX1) | |||
| CCOMMON_OPT += -march=armv9 -mtune=cortexx1 | |||
| ifneq ($(F_COMPILER), NAG) | |||
| FCOMMON_OPT += -march=armv9 -mtune=cortexx1 | |||
| endif | |||
| endif | |||
| endif | |||
| ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
| ifeq ($(CORE), CORTEXX2) | |||
| CCOMMON_OPT += -march=armv9 -mtune=cortexx2 | |||
| ifneq ($(F_COMPILER), NAG) | |||
| FCOMMON_OPT += -march=armv9 -mtune=cortexx2 | |||
| endif | |||
| endif | |||
| endif | |||
| #ifeq (1, $(filter 1,$(ISCLANG))) | |||
| ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
| ifeq ($(CORE), CORTEXA510) | |||
| CCOMMON_OPT += -march=armv8.4-a+sve | |||
| ifneq ($(F_COMPILER), NAG) | |||
| FCOMMON_OPT += -march=armv8.4-a+sve | |||
| endif | |||
| endif | |||
| endif | |||
| ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
| ifeq ($(CORE), CORTEXA710) | |||
| CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710 | |||
| ifneq ($(F_COMPILER), NAG) | |||
| FCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710 | |||
| endif | |||
| endif | |||
| endif | |||
| endif | |||
| endif | |||
| @@ -92,6 +92,10 @@ CORTEXA53 | |||
| CORTEXA57 | |||
| CORTEXA72 | |||
| CORTEXA73 | |||
| CORTEXA510 | |||
| CORTEXA710 | |||
| CORTEXX1 | |||
| CORTEXX2 | |||
| NEOVERSEN1 | |||
| NEOVERSEV1 | |||
| NEOVERSEN2 | |||
| @@ -103,6 +107,9 @@ THUNDERX2T99 | |||
| TSV110 | |||
| THUNDERX3T110 | |||
| VORTEX | |||
| A64FX | |||
| ARMV8SVE | |||
| FT2000 | |||
| 9.System Z: | |||
| ZARCH_GENERIC | |||
| @@ -316,6 +316,7 @@ if ($architecture ne $hostarch) { | |||
| } | |||
| $cross = 1 if ($os ne $hostos); | |||
| $cross = 0 if (($os eq "Android") && ($hostos eq "Linux") && ($ENV{TERMUX_APP_PID} != "")); | |||
| $openmp = "" if $ENV{USE_OPENMP} != 1; | |||
| @@ -45,6 +45,10 @@ size_t length64=sizeof(value64); | |||
| #define CPU_NEOVERSEN1 11 | |||
| #define CPU_NEOVERSEV1 16 | |||
| #define CPU_NEOVERSEN2 17 | |||
| #define CPU_CORTEXX1 18 | |||
| #define CPU_CORTEXX2 19 | |||
| #define CPU_CORTEXA510 20 | |||
| #define CPU_CORTEXA710 21 | |||
| // Qualcomm | |||
| #define CPU_FALKOR 6 | |||
| // Cavium | |||
| @@ -59,6 +63,8 @@ size_t length64=sizeof(value64); | |||
| #define CPU_VORTEX 13 | |||
| // Fujitsu | |||
| #define CPU_A64FX 15 | |||
| // Phytium | |||
| #define CPU_FT2000 22 | |||
| static char *cpuname[] = { | |||
| "UNKNOWN", | |||
| @@ -73,12 +79,17 @@ static char *cpuname[] = { | |||
| "TSV110", | |||
| "EMAG8180", | |||
| "NEOVERSEN1", | |||
| "NEOVERSEV1" | |||
| "NEOVERSEN2" | |||
| "THUNDERX3T110", | |||
| "VORTEX", | |||
| "CORTEXA55", | |||
| "A64FX" | |||
| "A64FX", | |||
| "NEOVERSEV1", | |||
| "NEOVERSEN2", | |||
| "CORTEXX1", | |||
| "CORTEXX2", | |||
| "CORTEXA510", | |||
| "CORTEXA710", | |||
| "FT2000" | |||
| }; | |||
| static char *cpuname_lower[] = { | |||
| @@ -94,12 +105,17 @@ static char *cpuname_lower[] = { | |||
| "tsv110", | |||
| "emag8180", | |||
| "neoversen1", | |||
| "neoversev1", | |||
| "neoversen2", | |||
| "thunderx3t110", | |||
| "vortex", | |||
| "cortexa55", | |||
| "a64fx" | |||
| "a64fx", | |||
| "neoversev1", | |||
| "neoversen2", | |||
| "cortexx1", | |||
| "cortexx2", | |||
| "cortexa510", | |||
| "cortexa710", | |||
| "ft2000" | |||
| }; | |||
| int get_feature(char *search) | |||
| @@ -182,6 +198,14 @@ int detect(void) | |||
| return CPU_NEOVERSEN2; | |||
| else if (strstr(cpu_part, "0xd05")) | |||
| return CPU_CORTEXA55; | |||
| else if (strstr(cpu_part, "0xd46")) | |||
| return CPU_CORTEXA510; | |||
| else if (strstr(cpu_part, "0xd47")) | |||
| return CPU_CORTEXA710; | |||
| else if (strstr(cpu_part, "0xd44")) | |||
| return CPU_CORTEXX1; | |||
| else if (strstr(cpu_part, "0xd4c")) | |||
| return CPU_CORTEXX2; | |||
| } | |||
| // Qualcomm | |||
| else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) | |||
| @@ -202,6 +226,13 @@ int detect(void) | |||
| // Fujitsu | |||
| else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001")) | |||
| return CPU_A64FX; | |||
| // Apple | |||
| else if (strstr(cpu_implementer, "0x61") && strstr(cpu_part, "0x022")) | |||
| return CPU_VORTEX; | |||
| // Phytium | |||
| else if (strstr(cpu_implementer, "0x70") && (strstr(cpu_part, "0x660") || strstr(cpu_part, "0x661") | |||
| || strstr(cpu_part, "0x662") || strstr(cpu_part, "0x663"))) | |||
| return CPU_FT2000; | |||
| } | |||
| p = (char *) NULL ; | |||
| @@ -382,7 +413,24 @@ void get_cpuconfig(void) | |||
| printf("#define DTB_DEFAULT_ENTRIES 48\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| break; | |||
| case CPU_CORTEXA510: | |||
| case CPU_CORTEXA710: | |||
| case CPU_CORTEXX1: | |||
| case CPU_CORTEXX2: | |||
| printf("#define ARMV9\n"); | |||
| printf("#define %s\n", cpuname[d]); | |||
| printf("#define L1_CODE_SIZE 65536\n"); | |||
| printf("#define L1_CODE_LINESIZE 64\n"); | |||
| printf("#define L1_CODE_ASSOCIATIVE 4\n"); | |||
| printf("#define L1_DATA_SIZE 65536\n"); | |||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||
| printf("#define L1_DATA_ASSOCIATIVE 4\n"); | |||
| printf("#define L2_SIZE 1048576\n"); | |||
| printf("#define L2_LINESIZE 64\n"); | |||
| printf("#define L2_ASSOCIATIVE 8\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| break; | |||
| case CPU_FALKOR: | |||
| printf("#define FALKOR\n"); | |||
| printf("#define L1_CODE_SIZE 65536\n"); | |||
| @@ -469,9 +517,9 @@ void get_cpuconfig(void) | |||
| printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | |||
| printf("#define DTB_SIZE 4096 \n"); | |||
| break; | |||
| #ifdef __APPLE__ | |||
| case CPU_VORTEX: | |||
| printf("#define VORTEX \n"); | |||
| #ifdef __APPLE__ | |||
| sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0); | |||
| printf("#define L1_CODE_SIZE %lld \n",value64); | |||
| sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0); | |||
| @@ -480,10 +528,10 @@ void get_cpuconfig(void) | |||
| printf("#define L1_DATA_SIZE %lld \n",value64); | |||
| sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0); | |||
| printf("#define L2_SIZE %lld \n",value64); | |||
| #endif | |||
| printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | |||
| printf("#define DTB_SIZE 4096 \n"); | |||
| break; | |||
| #endif | |||
| case CPU_A64FX: | |||
| printf("#define A64FX\n"); | |||
| printf("#define L1_CODE_SIZE 65535\n"); | |||
| @@ -494,6 +542,16 @@ void get_cpuconfig(void) | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| break; | |||
| case CPU_FT2000: | |||
| printf("#define FT2000\n"); | |||
| printf("#define L1_CODE_SIZE 32768\n"); | |||
| printf("#define L1_DATA_SIZE 32768\n"); | |||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||
| printf("#define L2_SIZE 33554432\n"); | |||
| printf("#define L2_LINESIZE 64\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| break; | |||
| } | |||
| get_cpucount(); | |||
| } | |||
| @@ -1232,7 +1232,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "cortexa53" | |||
| #define CORENAME "CORTEXA53" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_CORTEXA57 | |||
| @@ -1248,7 +1247,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "cortexa57" | |||
| #define CORENAME "CORTEXA57" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_CORTEXA72 | |||
| @@ -1264,7 +1262,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "cortexa72" | |||
| #define CORENAME "CORTEXA72" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_CORTEXA73 | |||
| @@ -1280,7 +1277,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "cortexa73" | |||
| #define CORENAME "CORTEXA73" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_CORTEXX1 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "CORTEXX1" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DCORTEXX1 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||
| #define LIBNAME "cortexx1" | |||
| #define CORENAME "CORTEXX1" | |||
| #endif | |||
| #ifdef FORCE_CORTEXX2 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "CORTEXX2" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DCORTEXX2 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||
| #define LIBNAME "cortexx2" | |||
| #define CORENAME "CORTEXX2" | |||
| #endif | |||
| #ifdef FORCE_CORTEXA510 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "CORTEXA510" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DCORTEXA510 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||
| #define LIBNAME "cortexa510" | |||
| #define CORENAME "CORTEXA510" | |||
| #endif | |||
| #ifdef FORCE_CORTEXA710 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "CORTEXA710" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DCORTEXA710 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||
| #define LIBNAME "cortexa710" | |||
| #define CORENAME "CORTEXA710" | |||
| #endif | |||
| #ifdef FORCE_NEOVERSEN1 | |||
| @@ -1297,7 +1349,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-march=armv8.2-a -mtune=neoverse-n1" | |||
| #define LIBNAME "neoversen1" | |||
| #define CORENAME "NEOVERSEN1" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_NEOVERSEV1 | |||
| @@ -1314,7 +1365,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-march=armv8.4-a -mtune=neoverse-v1" | |||
| #define LIBNAME "neoversev1" | |||
| #define CORENAME "NEOVERSEV1" | |||
| #else | |||
| #endif | |||
| @@ -1332,7 +1382,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-march=armv8.5-a -mtune=neoverse-n2" | |||
| #define LIBNAME "neoversen2" | |||
| #define CORENAME "NEOVERSEN2" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_CORTEXA55 | |||
| @@ -1348,7 +1397,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "cortexa55" | |||
| #define CORENAME "CORTEXA55" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_FALKOR | |||
| @@ -1364,7 +1412,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "falkor" | |||
| #define CORENAME "FALKOR" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_THUNDERX | |||
| @@ -1379,7 +1426,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "thunderx" | |||
| #define CORENAME "THUNDERX" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_THUNDERX2T99 | |||
| @@ -1397,7 +1443,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "thunderx2t99" | |||
| #define CORENAME "THUNDERX2T99" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_TSV110 | |||
| @@ -1413,7 +1458,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "tsv110" | |||
| #define CORENAME "TSV110" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_EMAG8180 | |||
| @@ -1448,7 +1492,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "thunderx3t110" | |||
| #define CORENAME "THUNDERX3T110" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_VORTEX | |||
| @@ -1480,7 +1523,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8" | |||
| #define LIBNAME "a64fx" | |||
| #define CORENAME "A64FX" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_FT2000 | |||
| #define ARMV8 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "FT2000" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DFT2000 " \ | |||
| "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \ | |||
| "-DL2_SIZE=33554426-DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "ft2000" | |||
| #define CORENAME "FT2000" | |||
| #endif | |||
| #ifdef FORCE_ZARCH_GENERIC | |||
| @@ -3130,7 +3130,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #if defined(CORTEXA57) || \ | |||
| defined(CORTEXA72) || defined(CORTEXA73) || \ | |||
| defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) | |||
| defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000) | |||
| #define SGEMM_DEFAULT_UNROLL_M 16 | |||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||
| @@ -3377,7 +3377,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d | |||
| #define CGEMM_DEFAULT_R 4096 | |||
| #define ZGEMM_DEFAULT_R 4096 | |||
| #elif defined(ARMV8SVE) || defined(A64FX) | |||
| #elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510) | |||
| /* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl". | |||
| Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */ | |||