Simplifying ARMv8 build parameterstags/v0.3.4
| @@ -4,22 +4,37 @@ CCOMMON_OPT += -march=armv8-a | |||
| FCOMMON_OPT += -march=armv8-a | |||
| endif | |||
| ifeq ($(CORE), CORTEXA53) | |||
| CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53 | |||
| FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53 | |||
| endif | |||
| ifeq ($(CORE), CORTEXA57) | |||
| CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 | |||
| FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 | |||
| CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57 | |||
| FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57 | |||
| endif | |||
| ifeq ($(CORE), CORTEXA72) | |||
| CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||
| FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||
| endif | |||
| ifeq ($(CORE), VULCAN) | |||
| CCOMMON_OPT += -mtune=vulcan -mcpu=vulcan | |||
| FCOMMON_OPT += -mtune=vulcan -mcpu=vulcan | |||
| ifeq ($(CORE), CORTEXA73) | |||
| CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73 | |||
| FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73 | |||
| endif | |||
| ifeq ($(CORE), THUNDERX) | |||
| CCOMMON_OPT += -mtune=thunderx -mcpu=thunderx | |||
| FCOMMON_OPT += -mtune=thunderx -mcpu=thunderx | |||
| CCOMMON_OPT += -march=armv8-a -mtune=thunderx | |||
| FCOMMON_OPT += -march=armv8-a -mtune=thunderx | |||
| endif | |||
| ifeq ($(CORE), FALKOR) | |||
| CCOMMON_OPT += -march=armv8.1-a -mtune=falkor | |||
| FCOMMON_OPT += -march=armv8.1-a -mtune=falkor | |||
| endif | |||
| ifeq ($(CORE), THUNDERX2T99) | |||
| CCOMMON_OPT += -mtune=thunderx2t99 -mcpu=thunderx2t99 | |||
| FCOMMON_OPT += -mtune=thunderx2t99 -mcpu=thunderx2t99 | |||
| CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 | |||
| FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 | |||
| endif | |||
| @@ -83,8 +83,11 @@ ARMV5 | |||
| 8.ARM 64-bit CPU: | |||
| ARMV8 | |||
| CORTEXA53 | |||
| CORTEXA57 | |||
| VULCAN | |||
| CORTEXA72 | |||
| CORTEXA73 | |||
| FALKOR | |||
| THUNDERX | |||
| THUNDERX2T99 | |||
| @@ -29,25 +29,37 @@ | |||
| #define CPU_UNKNOWN 0 | |||
| #define CPU_ARMV8 1 | |||
| #define CPU_CORTEXA57 2 | |||
| #define CPU_VULCAN 3 | |||
| #define CPU_THUNDERX 4 | |||
| #define CPU_THUNDERX2T99 5 | |||
| // Arm | |||
| #define CPU_CORTEXA53 2 | |||
| #define CPU_CORTEXA57 3 | |||
| #define CPU_CORTEXA72 4 | |||
| #define CPU_CORTEXA73 5 | |||
| // Qualcomm | |||
| #define CPU_FALKOR 6 | |||
| // Cavium | |||
| #define CPU_THUNDERX 7 | |||
| #define CPU_THUNDERX2T99 8 | |||
| static char *cpuname[] = { | |||
| "UNKNOWN", | |||
| "ARMV8" , | |||
| "CORTEXA53", | |||
| "CORTEXA57", | |||
| "VULCAN", | |||
| "CORTEXA72", | |||
| "CORTEXA73", | |||
| "FALKOR", | |||
| "THUNDERX", | |||
| "THUNDERX2T99" | |||
| }; | |||
| static char *cpuname_lower[] = { | |||
| "unknown", | |||
| "armv8" , | |||
| "armv8", | |||
| "cortexa53", | |||
| "cortexa57", | |||
| "vulcan", | |||
| "cortexa72", | |||
| "cortexa73", | |||
| "falkor", | |||
| "thunderx", | |||
| "thunderx2t99" | |||
| }; | |||
| @@ -114,14 +126,24 @@ int detect(void) | |||
| fclose(infile); | |||
| if(cpu_part != NULL && cpu_implementer != NULL) { | |||
| if (strstr(cpu_implementer, "0x41") && | |||
| (strstr(cpu_part, "0xd07") || strstr(cpu_part,"0xd08"))) | |||
| return CPU_CORTEXA57; //or compatible, ex. A72 | |||
| else if (strstr(cpu_part, "0x516") && strstr(cpu_implementer, "0x42")) | |||
| return CPU_VULCAN; | |||
| else if (strstr(cpu_part, "0x0a1") && strstr(cpu_implementer, "0x43")) | |||
| // Arm | |||
| if (strstr(cpu_implementer, "0x41")) { | |||
| if (strstr(cpu_part, "0xd03")) | |||
| return CPU_CORTEXA53; | |||
| else if (strstr(cpu_part, "0xd07")) | |||
| return CPU_CORTEXA57; | |||
| else if (strstr(cpu_part, "0xd08")) | |||
| return CPU_CORTEXA72; | |||
| else if (strstr(cpu_part, "0xd09")) | |||
| return CPU_CORTEXA73; | |||
| } | |||
| // Qualcomm | |||
| else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) | |||
| return CPU_FALKOR; | |||
| // Cavium | |||
| else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0a1")) | |||
| return CPU_THUNDERX; | |||
| else if (strstr(cpu_part, "0x0af") && strstr(cpu_implementer, "0x43")) | |||
| else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0af")) | |||
| return CPU_THUNDERX2T99; | |||
| } | |||
| @@ -180,62 +202,62 @@ void get_subdirname(void) | |||
| void get_cpuconfig(void) | |||
| { | |||
| // All arches should define ARMv8 | |||
| printf("#define ARMV8\n"); | |||
| printf("#define HAVE_NEON\n"); // This shouldn't be necessary | |||
| printf("#define HAVE_VFPV4\n"); // This shouldn't be necessary | |||
| int d = detect(); | |||
| switch (d) | |||
| { | |||
| case CPU_CORTEXA53: | |||
| printf("#define %s\n", cpuname[d]); | |||
| // Fall-through | |||
| case CPU_ARMV8: | |||
| printf("#define ARMV8\n"); | |||
| printf("#define L1_DATA_SIZE 32768\n"); | |||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||
| printf("#define L2_SIZE 262144\n"); | |||
| printf("#define L2_LINESIZE 64\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| printf("#define L2_ASSOCIATIVE 4\n"); | |||
| break; | |||
| case CPU_VULCAN: | |||
| printf("#define VULCAN \n"); | |||
| printf("#define HAVE_VFP \n"); | |||
| printf("#define HAVE_VFPV3 \n"); | |||
| printf("#define HAVE_NEON \n"); | |||
| printf("#define HAVE_VFPV4 \n"); | |||
| printf("#define L1_CODE_SIZE 32768 \n"); | |||
| printf("#define L1_CODE_LINESIZE 64 \n"); | |||
| printf("#define L1_CODE_ASSOCIATIVE 8 \n"); | |||
| printf("#define L1_DATA_SIZE 32768 \n"); | |||
| printf("#define L1_DATA_LINESIZE 64 \n"); | |||
| printf("#define L1_DATA_ASSOCIATIVE 8 \n"); | |||
| printf("#define L2_SIZE 262144 \n"); | |||
| printf("#define L2_LINESIZE 64 \n"); | |||
| printf("#define L2_ASSOCIATIVE 8 \n"); | |||
| printf("#define L3_SIZE 33554432 \n"); | |||
| printf("#define L3_LINESIZE 64 \n"); | |||
| printf("#define L3_ASSOCIATIVE 32 \n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | |||
| printf("#define DTB_SIZE 4096 \n"); | |||
| // Minimum parameters for ARMv8 (based on A53) | |||
| printf("#define L1_DATA_SIZE 32768\n"); | |||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||
| printf("#define L2_SIZE 262144\n"); | |||
| printf("#define L2_LINESIZE 64\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| printf("#define L2_ASSOCIATIVE 4\n"); | |||
| break; | |||
| case CPU_CORTEXA57: | |||
| printf("#define CORTEXA57\n"); | |||
| printf("#define HAVE_VFP\n"); | |||
| printf("#define HAVE_VFPV3\n"); | |||
| printf("#define HAVE_NEON\n"); | |||
| printf("#define HAVE_VFPV4\n"); | |||
| case CPU_CORTEXA72: | |||
| case CPU_CORTEXA73: | |||
| // Common minimum settings for these Arm cores | |||
| // Can change a lot, but we need to be conservative | |||
| // TODO: detect info from /sys if possible | |||
| printf("#define %s\n", cpuname[d]); | |||
| printf("#define L1_CODE_SIZE 49152\n"); | |||
| printf("#define L1_CODE_LINESIZE 64\n"); | |||
| printf("#define L1_CODE_ASSOCIATIVE 3\n"); | |||
| printf("#define L1_DATA_SIZE 32768\n"); | |||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||
| printf("#define L1_DATA_ASSOCIATIVE 2\n"); | |||
| printf("#define L2_SIZE 2097152\n"); | |||
| printf("#define L2_SIZE 524288\n"); | |||
| printf("#define L2_LINESIZE 64\n"); | |||
| printf("#define L2_ASSOCIATIVE 16\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| break; | |||
| case CPU_FALKOR: | |||
| printf("#define FALKOR\n"); | |||
| printf("#define L1_CODE_SIZE 65536\n"); | |||
| printf("#define L1_CODE_LINESIZE 64\n"); | |||
| printf("#define L1_DATA_SIZE 32768\n"); | |||
| printf("#define L1_DATA_LINESIZE 128\n"); | |||
| printf("#define L2_SIZE 524288\n"); | |||
| printf("#define L2_LINESIZE 64\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| printf("#define L2_ASSOCIATIVE 16\n"); | |||
| break; | |||
| case CPU_THUNDERX: | |||
| printf("#define THUNDERX\n"); | |||
| printf("#define L1_DATA_SIZE 32768\n"); | |||
| @@ -249,10 +271,6 @@ void get_cpuconfig(void) | |||
| case CPU_THUNDERX2T99: | |||
| printf("#define VULCAN \n"); | |||
| printf("#define HAVE_VFP \n"); | |||
| printf("#define HAVE_VFPV3 \n"); | |||
| printf("#define HAVE_NEON \n"); | |||
| printf("#define HAVE_VFPV4 \n"); | |||
| printf("#define L1_CODE_SIZE 32768 \n"); | |||
| printf("#define L1_CODE_LINESIZE 64 \n"); | |||
| printf("#define L1_CODE_ASSOCIATIVE 8 \n"); | |||
| @@ -927,11 +927,28 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define ARCHCONFIG "-DARMV8 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "armv8" | |||
| #define CORENAME "ARMV8" | |||
| #endif | |||
| #ifdef FORCE_CORTEXA53 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "CORTEXA53" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DCORTEXA53 " \ | |||
| "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "cortexa53" | |||
| #define CORENAME "CORTEXA53" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_CORTEXA57 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| @@ -942,26 +959,57 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ | |||
| "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON" | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "cortexa57" | |||
| #define CORENAME "CORTEXA57" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_VULCAN | |||
| #ifdef FORCE_CORTEXA72 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "VULCAN" | |||
| #define SUBARCHITECTURE "CORTEXA72" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DVULCAN " \ | |||
| "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \ | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ | |||
| "-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \ | |||
| #define ARCHCONFIG "-DCORTEXA72 " \ | |||
| "-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ | |||
| "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "cortexa72" | |||
| #define CORENAME "CORTEXA72" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_CORTEXA73 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "CORTEXA73" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DCORTEXA73 " \ | |||
| "-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ | |||
| "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "cortexa73" | |||
| #define CORENAME "CORTEXA73" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_FALKOR | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "FALKOR" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DFALKOR " \ | |||
| "-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ | |||
| "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON" | |||
| #define LIBNAME "vulcan" | |||
| #define CORENAME "VULCAN" | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "falkor" | |||
| #define CORENAME "FALKOR" | |||
| #else | |||
| #endif | |||
| @@ -973,13 +1021,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define ARCHCONFIG "-DTHUNDERX " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \ | |||
| "-DL2_SIZE=16777216 -DL2_LINESIZE=128 -DL2_ASSOCIATIVE=16 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "thunderx" | |||
| #define CORENAME "THUNDERX" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_THUNDERX2T99 | |||
| #define ARMV8 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "THUNDERX2T99" | |||
| @@ -990,7 +1040,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ | |||
| "-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON" | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "thunderx2t99" | |||
| #define CORENAME "THUNDERX2T99" | |||
| #else | |||
| @@ -46,7 +46,7 @@ CAMAXKERNEL = zamax.S | |||
| ZAMAXKERNEL = zamax.S | |||
| SAXPYKERNEL = axpy.S | |||
| DAXPYKERNEL = daxpy_thunderx2t99.S | |||
| DAXPYKERNEL = axpy.S | |||
| CAXPYKERNEL = zaxpy.S | |||
| ZAXPYKERNEL = zaxpy.S | |||
| @@ -71,39 +71,37 @@ CGEMVTKERNEL = zgemv_t.S | |||
| ZGEMVTKERNEL = zgemv_t.S | |||
| SASUMKERNEL = sasum_thunderx2t99.c | |||
| DASUMKERNEL = dasum_thunderx2t99.c | |||
| CASUMKERNEL = casum_thunderx2t99.c | |||
| ZASUMKERNEL = zasum_thunderx2t99.c | |||
| SASUMKERNEL = asum.S | |||
| DASUMKERNEL = asum.S | |||
| CASUMKERNEL = casum.S | |||
| ZASUMKERNEL = zasum.S | |||
| SCOPYKERNEL = copy_thunderx2t99.c | |||
| DCOPYKERNEL = copy_thunderx2t99.c | |||
| CCOPYKERNEL = copy_thunderx2t99.c | |||
| ZCOPYKERNEL = copy_thunderx2t99.c | |||
| SCOPYKERNEL = copy.S | |||
| DCOPYKERNEL = copy.S | |||
| CCOPYKERNEL = copy.S | |||
| ZCOPYKERNEL = copy.S | |||
| SSWAPKERNEL = swap_thunderx2t99.S | |||
| DSWAPKERNEL = swap_thunderx2t99.S | |||
| CSWAPKERNEL = swap_thunderx2t99.S | |||
| ZSWAPKERNEL = swap_thunderx2t99.S | |||
| SSWAPKERNEL = swap.S | |||
| DSWAPKERNEL = swap.S | |||
| CSWAPKERNEL = swap.S | |||
| ZSWAPKERNEL = swap.S | |||
| ISAMAXKERNEL = iamax_thunderx2t99.c | |||
| IDAMAXKERNEL = iamax_thunderx2t99.c | |||
| ICAMAXKERNEL = izamax_thunderx2t99.c | |||
| IZAMAXKERNEL = izamax_thunderx2t99.c | |||
| ISAMAXKERNEL = iamax.S | |||
| IDAMAXKERNEL = iamax.S | |||
| ICAMAXKERNEL = izamax.S | |||
| IZAMAXKERNEL = izamax.S | |||
| ifneq ($(OS_DARWIN)$(CROSS),11) | |||
| SNRM2KERNEL = scnrm2_thunderx2t99.c | |||
| CNRM2KERNEL = scnrm2_thunderx2t99.c | |||
| #DNRM2KERNEL = dznrm2_thunderx2t99_fast.c | |||
| #ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c | |||
| DNRM2KERNEL = dznrm2_thunderx2t99.c | |||
| ZNRM2KERNEL = dznrm2_thunderx2t99.c | |||
| SNRM2KERNEL = nrm2.S | |||
| CNRM2KERNEL = nrm2.S | |||
| DNRM2KERNEL = znrm2.S | |||
| ZNRM2KERNEL = znrm2.S | |||
| endif | |||
| DDOTKERNEL = dot_thunderx2t99.c | |||
| SDOTKERNEL = dot_thunderx2t99.c | |||
| CDOTKERNEL = zdot_thunderx2t99.c | |||
| ZDOTKERNEL = zdot_thunderx2t99.c | |||
| DDOTKERNEL = dot.S | |||
| SDOTKERNEL = dot.S | |||
| CDOTKERNEL = zdot.S | |||
| ZDOTKERNEL = zdot.S | |||
| DSDOTKERNEL = dot.S | |||
| ifneq ($(OS_DARWIN)$(CROSS),11) | |||
| @@ -175,22 +173,6 @@ ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4) | |||
| DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S | |||
| endif | |||
| ifeq ($(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N), 16x4) | |||
| SGEMMKERNEL = sgemm_kernel_16x4_thunderx2t99.S | |||
| endif | |||
| ifeq ($(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N), 8x4) | |||
| CGEMMKERNEL = cgemm_kernel_8x4_thunderx2t99.S | |||
| endif | |||
| ifeq ($(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N), 4x4) | |||
| ZGEMMKERNEL = zgemm_kernel_4x4_thunderx2t99.S | |||
| endif | |||
| else | |||
| STRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
| @@ -0,0 +1,3 @@ | |||
| include $(KERNELDIR)/KERNEL.ARMV8 | |||
| @@ -0,0 +1,3 @@ | |||
| include $(KERNELDIR)/KERNEL.CORTEXA57 | |||
| @@ -0,0 +1,3 @@ | |||
| include $(KERNELDIR)/KERNEL.CORTEXA57 | |||
| @@ -0,0 +1,3 @@ | |||
| include $(KERNELDIR)/KERNEL.CORTEXA57 | |||
| @@ -1,3 +0,0 @@ | |||
| include $(KERNELDIR)/KERNEL.THUNDERX2T99 | |||
| @@ -2543,8 +2543,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define SYMV_P 16 | |||
| #endif | |||
| // Common ARMv8 parameters | |||
| #if defined(ARMV8) | |||
| #if defined(CORTEXA57) | |||
| #define SNUMOPT 2 | |||
| #define DNUMOPT 2 | |||
| @@ -2552,46 +2553,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
| #define SGEMM_DEFAULT_UNROLL_M 16 | |||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||
| #define DGEMM_DEFAULT_UNROLL_M 8 | |||
| #define DGEMM_DEFAULT_UNROLL_N 4 | |||
| #define CGEMM_DEFAULT_UNROLL_M 8 | |||
| #define CGEMM_DEFAULT_UNROLL_N 4 | |||
| #define ZGEMM_DEFAULT_UNROLL_M 4 | |||
| #define ZGEMM_DEFAULT_UNROLL_N 4 | |||
| #define SGEMM_DEFAULT_P 512 | |||
| #define DGEMM_DEFAULT_P 256 | |||
| #define CGEMM_DEFAULT_P 256 | |||
| #define ZGEMM_DEFAULT_P 128 | |||
| #define SGEMM_DEFAULT_Q 1024 | |||
| #define DGEMM_DEFAULT_Q 512 | |||
| #define CGEMM_DEFAULT_Q 512 | |||
| #define ZGEMM_DEFAULT_Q 512 | |||
| #define SGEMM_DEFAULT_R 4096 | |||
| #define DGEMM_DEFAULT_R 4096 | |||
| #define CGEMM_DEFAULT_R 4096 | |||
| #define ZGEMM_DEFAULT_R 2048 | |||
| #define SYMV_P 16 | |||
| #endif | |||
| #if defined(ARMV8) | |||
| // Darwin / Cross | |||
| #if defined(OS_DARWIN) && defined(CROSS) | |||
| #define SNUMOPT 2 | |||
| #define DNUMOPT 2 | |||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
| #define SGEMM_DEFAULT_UNROLL_M 2 | |||
| #define SGEMM_DEFAULT_UNROLL_N 2 | |||
| @@ -2620,15 +2585,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CGEMM_DEFAULT_R 4096 | |||
| #define ZGEMM_DEFAULT_R 4096 | |||
| #define SYMV_P 16 | |||
| #else | |||
| #else // Linux / Native | |||
| #define SNUMOPT 2 | |||
| #define DNUMOPT 2 | |||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
| #if defined(CORTEXA53) || defined(CORTEXA57) || \ | |||
| defined(CORTEXA72) || defined(CORTEXA73) || \ | |||
| defined(FALKOR) | |||
| #define SGEMM_DEFAULT_UNROLL_M 16 | |||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||
| @@ -2642,33 +2603,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define ZGEMM_DEFAULT_UNROLL_M 4 | |||
| #define ZGEMM_DEFAULT_UNROLL_N 4 | |||
| #define SGEMM_DEFAULT_P 128 | |||
| #define DGEMM_DEFAULT_P 160 | |||
| #define CGEMM_DEFAULT_P 128 | |||
| #define SGEMM_DEFAULT_P 512 | |||
| #define DGEMM_DEFAULT_P 256 | |||
| #define CGEMM_DEFAULT_P 256 | |||
| #define ZGEMM_DEFAULT_P 128 | |||
| #define SGEMM_DEFAULT_Q 352 | |||
| #define DGEMM_DEFAULT_Q 128 | |||
| #define CGEMM_DEFAULT_Q 224 | |||
| #define ZGEMM_DEFAULT_Q 112 | |||
| #define SGEMM_DEFAULT_Q 1024 | |||
| #define DGEMM_DEFAULT_Q 512 | |||
| #define CGEMM_DEFAULT_Q 512 | |||
| #define ZGEMM_DEFAULT_Q 512 | |||
| #define SGEMM_DEFAULT_R 4096 | |||
| #define DGEMM_DEFAULT_R 4096 | |||
| #define CGEMM_DEFAULT_R 4096 | |||
| #define ZGEMM_DEFAULT_R 4096 | |||
| #define SYMV_P 16 | |||
| #endif | |||
| #endif | |||
| #if defined(THUNDERX) | |||
| #define SNUMOPT 2 | |||
| #define DNUMOPT 2 | |||
| #define ZGEMM_DEFAULT_R 2048 | |||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
| #elif defined(THUNDERX) | |||
| #define SGEMM_DEFAULT_UNROLL_M 4 | |||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||
| @@ -2697,17 +2647,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CGEMM_DEFAULT_R 4096 | |||
| #define ZGEMM_DEFAULT_R 4096 | |||
| #elif defined(THUNDERX2T99) | |||
| #define SYMV_P 16 | |||
| #endif | |||
| #define SGEMM_DEFAULT_UNROLL_M 16 | |||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||
| #if defined(THUNDERX2T99) || defined(VULCAN) | |||
| #define SNUMOPT 2 | |||
| #define DNUMOPT 2 | |||
| #define DGEMM_DEFAULT_UNROLL_M 8 | |||
| #define DGEMM_DEFAULT_UNROLL_N 4 | |||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
| #define CGEMM_DEFAULT_UNROLL_M 8 | |||
| #define CGEMM_DEFAULT_UNROLL_N 4 | |||
| #define ZGEMM_DEFAULT_UNROLL_M 4 | |||
| #define ZGEMM_DEFAULT_UNROLL_N 4 | |||
| #define SGEMM_DEFAULT_P 128 | |||
| #define DGEMM_DEFAULT_P 160 | |||
| #define CGEMM_DEFAULT_P 128 | |||
| #define ZGEMM_DEFAULT_P 128 | |||
| #define SGEMM_DEFAULT_Q 352 | |||
| #define DGEMM_DEFAULT_Q 128 | |||
| #define CGEMM_DEFAULT_Q 224 | |||
| #define ZGEMM_DEFAULT_Q 112 | |||
| #define SGEMM_DEFAULT_R 4096 | |||
| #define DGEMM_DEFAULT_R 4096 | |||
| #define CGEMM_DEFAULT_R 4096 | |||
| #define ZGEMM_DEFAULT_R 4096 | |||
| #else // Other/undetected ARMv8 cores | |||
| #define SGEMM_DEFAULT_UNROLL_M 16 | |||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||
| @@ -2736,8 +2705,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CGEMM_DEFAULT_R 4096 | |||
| #define ZGEMM_DEFAULT_R 4096 | |||
| #define SYMV_P 16 | |||
| #endif | |||
| #endif // Cores | |||
| #endif // Linux / Darwin | |||
| #endif // ARMv8 | |||
| #if defined(ARMV5) | |||
| #define SNUMOPT 2 | |||