Initial support for M1 on Linux, Phytium FT2000 series, ARMV9 Cortex X1,X2,A510,A710tags/v0.3.21
| @@ -55,6 +55,13 @@ FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73 | |||||
| endif | endif | ||||
| endif | endif | ||||
| ifeq ($(CORE), FT2000) | |||||
| CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||||
| ifneq ($(F_COMPILER), NAG) | |||||
| FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||||
| endif | |||||
| endif | |||||
| # Use a72 tunings because Neoverse-N1 is only available | # Use a72 tunings because Neoverse-N1 is only available | ||||
| # in GCC>=9 | # in GCC>=9 | ||||
| ifeq ($(CORE), NEOVERSEN1) | ifeq ($(CORE), NEOVERSEN1) | ||||
| @@ -229,6 +236,43 @@ endif | |||||
| endif | endif | ||||
| endif | endif | ||||
| ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||||
| ifeq ($(CORE), CORTEXX1) | |||||
| CCOMMON_OPT += -march=armv9 -mtune=cortexx1 | |||||
| ifneq ($(F_COMPILER), NAG) | |||||
| FCOMMON_OPT += -march=armv9 -mtune=cortexx1 | |||||
| endif | |||||
| endif | |||||
| endif | |||||
| ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||||
| ifeq ($(CORE), CORTEXX2) | |||||
| CCOMMON_OPT += -march=armv9 -mtune=cortexx2 | |||||
| ifneq ($(F_COMPILER), NAG) | |||||
| FCOMMON_OPT += -march=armv9 -mtune=cortexx2 | |||||
| endif | |||||
| endif | |||||
| endif | |||||
| #ifeq (1, $(filter 1,$(ISCLANG))) | |||||
| ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||||
| ifeq ($(CORE), CORTEXA510) | |||||
| CCOMMON_OPT += -march=armv8.4-a+sve | |||||
| ifneq ($(F_COMPILER), NAG) | |||||
| FCOMMON_OPT += -march=armv8.4-a+sve | |||||
| endif | |||||
| endif | |||||
| endif | |||||
| ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||||
| ifeq ($(CORE), CORTEXA710) | |||||
| CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710 | |||||
| ifneq ($(F_COMPILER), NAG) | |||||
| FCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710 | |||||
| endif | |||||
| endif | |||||
| endif | |||||
| endif | endif | ||||
| endif | endif | ||||
| @@ -92,6 +92,10 @@ CORTEXA53 | |||||
| CORTEXA57 | CORTEXA57 | ||||
| CORTEXA72 | CORTEXA72 | ||||
| CORTEXA73 | CORTEXA73 | ||||
| CORTEXA510 | |||||
| CORTEXA710 | |||||
| CORTEXX1 | |||||
| CORTEXX2 | |||||
| NEOVERSEN1 | NEOVERSEN1 | ||||
| NEOVERSEV1 | NEOVERSEV1 | ||||
| NEOVERSEN2 | NEOVERSEN2 | ||||
| @@ -103,6 +107,9 @@ THUNDERX2T99 | |||||
| TSV110 | TSV110 | ||||
| THUNDERX3T110 | THUNDERX3T110 | ||||
| VORTEX | VORTEX | ||||
| A64FX | |||||
| ARMV8SVE | |||||
| FT2000 | |||||
| 9.System Z: | 9.System Z: | ||||
| ZARCH_GENERIC | ZARCH_GENERIC | ||||
| @@ -316,6 +316,7 @@ if ($architecture ne $hostarch) { | |||||
| } | } | ||||
| $cross = 1 if ($os ne $hostos); | $cross = 1 if ($os ne $hostos); | ||||
| $cross = 0 if (($os eq "Android") && ($hostos eq "Linux") && ($ENV{TERMUX_APP_PID} != "")); | |||||
| $openmp = "" if $ENV{USE_OPENMP} != 1; | $openmp = "" if $ENV{USE_OPENMP} != 1; | ||||
| @@ -161,6 +161,30 @@ if (${CORE} STREQUAL ARMV8SVE) | |||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| if (${CORE} STREQUAL CORTEXA510) | |||||
| if (NOT DYNAMIC_ARCH) | |||||
| set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") | |||||
| endif () | |||||
| endif () | |||||
| if (${CORE} STREQUAL CORTEXA710) | |||||
| if (NOT DYNAMIC_ARCH) | |||||
| set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") | |||||
| endif () | |||||
| endif () | |||||
| if (${CORE} STREQUAL CORTEXX1) | |||||
| if (NOT DYNAMIC_ARCH) | |||||
| set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") | |||||
| endif () | |||||
| endif () | |||||
| if (${CORE} STREQUAL CORTEXX2) | |||||
| if (NOT DYNAMIC_ARCH) | |||||
| set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") | |||||
| endif () | |||||
| endif () | |||||
| if (${CORE} STREQUAL POWER10) | if (${CORE} STREQUAL POWER10) | ||||
| if (NOT DYNAMIC_ARCH) | if (NOT DYNAMIC_ARCH) | ||||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | ||||
| @@ -45,6 +45,10 @@ size_t length64=sizeof(value64); | |||||
| #define CPU_NEOVERSEN1 11 | #define CPU_NEOVERSEN1 11 | ||||
| #define CPU_NEOVERSEV1 16 | #define CPU_NEOVERSEV1 16 | ||||
| #define CPU_NEOVERSEN2 17 | #define CPU_NEOVERSEN2 17 | ||||
| #define CPU_CORTEXX1 18 | |||||
| #define CPU_CORTEXX2 19 | |||||
| #define CPU_CORTEXA510 20 | |||||
| #define CPU_CORTEXA710 21 | |||||
| // Qualcomm | // Qualcomm | ||||
| #define CPU_FALKOR 6 | #define CPU_FALKOR 6 | ||||
| // Cavium | // Cavium | ||||
| @@ -59,6 +63,8 @@ size_t length64=sizeof(value64); | |||||
| #define CPU_VORTEX 13 | #define CPU_VORTEX 13 | ||||
| // Fujitsu | // Fujitsu | ||||
| #define CPU_A64FX 15 | #define CPU_A64FX 15 | ||||
| // Phytium | |||||
| #define CPU_FT2000 22 | |||||
| static char *cpuname[] = { | static char *cpuname[] = { | ||||
| "UNKNOWN", | "UNKNOWN", | ||||
| @@ -73,12 +79,17 @@ static char *cpuname[] = { | |||||
| "TSV110", | "TSV110", | ||||
| "EMAG8180", | "EMAG8180", | ||||
| "NEOVERSEN1", | "NEOVERSEN1", | ||||
| "NEOVERSEV1" | |||||
| "NEOVERSEN2" | |||||
| "THUNDERX3T110", | "THUNDERX3T110", | ||||
| "VORTEX", | "VORTEX", | ||||
| "CORTEXA55", | "CORTEXA55", | ||||
| "A64FX" | |||||
| "A64FX", | |||||
| "NEOVERSEV1", | |||||
| "NEOVERSEN2", | |||||
| "CORTEXX1", | |||||
| "CORTEXX2", | |||||
| "CORTEXA510", | |||||
| "CORTEXA710", | |||||
| "FT2000" | |||||
| }; | }; | ||||
| static char *cpuname_lower[] = { | static char *cpuname_lower[] = { | ||||
| @@ -94,12 +105,17 @@ static char *cpuname_lower[] = { | |||||
| "tsv110", | "tsv110", | ||||
| "emag8180", | "emag8180", | ||||
| "neoversen1", | "neoversen1", | ||||
| "neoversev1", | |||||
| "neoversen2", | |||||
| "thunderx3t110", | "thunderx3t110", | ||||
| "vortex", | "vortex", | ||||
| "cortexa55", | "cortexa55", | ||||
| "a64fx" | |||||
| "a64fx", | |||||
| "neoversev1", | |||||
| "neoversen2", | |||||
| "cortexx1", | |||||
| "cortexx2", | |||||
| "cortexa510", | |||||
| "cortexa710", | |||||
| "ft2000" | |||||
| }; | }; | ||||
| int get_feature(char *search) | int get_feature(char *search) | ||||
| @@ -182,6 +198,14 @@ int detect(void) | |||||
| return CPU_NEOVERSEN2; | return CPU_NEOVERSEN2; | ||||
| else if (strstr(cpu_part, "0xd05")) | else if (strstr(cpu_part, "0xd05")) | ||||
| return CPU_CORTEXA55; | return CPU_CORTEXA55; | ||||
| else if (strstr(cpu_part, "0xd46")) | |||||
| return CPU_CORTEXA510; | |||||
| else if (strstr(cpu_part, "0xd47")) | |||||
| return CPU_CORTEXA710; | |||||
| else if (strstr(cpu_part, "0xd44")) | |||||
| return CPU_CORTEXX1; | |||||
| else if (strstr(cpu_part, "0xd4c")) | |||||
| return CPU_CORTEXX2; | |||||
| } | } | ||||
| // Qualcomm | // Qualcomm | ||||
| else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) | else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) | ||||
| @@ -202,6 +226,13 @@ int detect(void) | |||||
| // Fujitsu | // Fujitsu | ||||
| else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001")) | else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001")) | ||||
| return CPU_A64FX; | return CPU_A64FX; | ||||
| // Apple | |||||
| else if (strstr(cpu_implementer, "0x61") && strstr(cpu_part, "0x022")) | |||||
| return CPU_VORTEX; | |||||
| // Phytium | |||||
| else if (strstr(cpu_implementer, "0x70") && (strstr(cpu_part, "0x660") || strstr(cpu_part, "0x661") | |||||
| || strstr(cpu_part, "0x662") || strstr(cpu_part, "0x663"))) | |||||
| return CPU_FT2000; | |||||
| } | } | ||||
| p = (char *) NULL ; | p = (char *) NULL ; | ||||
| @@ -382,7 +413,24 @@ void get_cpuconfig(void) | |||||
| printf("#define DTB_DEFAULT_ENTRIES 48\n"); | printf("#define DTB_DEFAULT_ENTRIES 48\n"); | ||||
| printf("#define DTB_SIZE 4096\n"); | printf("#define DTB_SIZE 4096\n"); | ||||
| break; | break; | ||||
| case CPU_CORTEXA510: | |||||
| case CPU_CORTEXA710: | |||||
| case CPU_CORTEXX1: | |||||
| case CPU_CORTEXX2: | |||||
| printf("#define ARMV9\n"); | |||||
| printf("#define %s\n", cpuname[d]); | |||||
| printf("#define L1_CODE_SIZE 65536\n"); | |||||
| printf("#define L1_CODE_LINESIZE 64\n"); | |||||
| printf("#define L1_CODE_ASSOCIATIVE 4\n"); | |||||
| printf("#define L1_DATA_SIZE 65536\n"); | |||||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||||
| printf("#define L1_DATA_ASSOCIATIVE 4\n"); | |||||
| printf("#define L2_SIZE 1048576\n"); | |||||
| printf("#define L2_LINESIZE 64\n"); | |||||
| printf("#define L2_ASSOCIATIVE 8\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| break; | |||||
| case CPU_FALKOR: | case CPU_FALKOR: | ||||
| printf("#define FALKOR\n"); | printf("#define FALKOR\n"); | ||||
| printf("#define L1_CODE_SIZE 65536\n"); | printf("#define L1_CODE_SIZE 65536\n"); | ||||
| @@ -469,9 +517,9 @@ void get_cpuconfig(void) | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | ||||
| printf("#define DTB_SIZE 4096 \n"); | printf("#define DTB_SIZE 4096 \n"); | ||||
| break; | break; | ||||
| #ifdef __APPLE__ | |||||
| case CPU_VORTEX: | case CPU_VORTEX: | ||||
| printf("#define VORTEX \n"); | printf("#define VORTEX \n"); | ||||
| #ifdef __APPLE__ | |||||
| sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0); | sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0); | ||||
| printf("#define L1_CODE_SIZE %lld \n",value64); | printf("#define L1_CODE_SIZE %lld \n",value64); | ||||
| sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0); | sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0); | ||||
| @@ -480,10 +528,10 @@ void get_cpuconfig(void) | |||||
| printf("#define L1_DATA_SIZE %lld \n",value64); | printf("#define L1_DATA_SIZE %lld \n",value64); | ||||
| sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0); | sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0); | ||||
| printf("#define L2_SIZE %lld \n",value64); | printf("#define L2_SIZE %lld \n",value64); | ||||
| #endif | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | ||||
| printf("#define DTB_SIZE 4096 \n"); | printf("#define DTB_SIZE 4096 \n"); | ||||
| break; | break; | ||||
| #endif | |||||
| case CPU_A64FX: | case CPU_A64FX: | ||||
| printf("#define A64FX\n"); | printf("#define A64FX\n"); | ||||
| printf("#define L1_CODE_SIZE 65535\n"); | printf("#define L1_CODE_SIZE 65535\n"); | ||||
| @@ -494,6 +542,16 @@ void get_cpuconfig(void) | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | printf("#define DTB_DEFAULT_ENTRIES 64\n"); | ||||
| printf("#define DTB_SIZE 4096\n"); | printf("#define DTB_SIZE 4096\n"); | ||||
| break; | break; | ||||
| case CPU_FT2000: | |||||
| printf("#define FT2000\n"); | |||||
| printf("#define L1_CODE_SIZE 32768\n"); | |||||
| printf("#define L1_DATA_SIZE 32768\n"); | |||||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||||
| printf("#define L2_SIZE 33554432\n"); | |||||
| printf("#define L2_LINESIZE 64\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| break; | |||||
| } | } | ||||
| get_cpucount(); | get_cpucount(); | ||||
| } | } | ||||
| @@ -1232,7 +1232,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "cortexa53" | #define LIBNAME "cortexa53" | ||||
| #define CORENAME "CORTEXA53" | #define CORENAME "CORTEXA53" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_CORTEXA57 | #ifdef FORCE_CORTEXA57 | ||||
| @@ -1248,7 +1247,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "cortexa57" | #define LIBNAME "cortexa57" | ||||
| #define CORENAME "CORTEXA57" | #define CORENAME "CORTEXA57" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_CORTEXA72 | #ifdef FORCE_CORTEXA72 | ||||
| @@ -1264,7 +1262,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "cortexa72" | #define LIBNAME "cortexa72" | ||||
| #define CORENAME "CORTEXA72" | #define CORENAME "CORTEXA72" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_CORTEXA73 | #ifdef FORCE_CORTEXA73 | ||||
| @@ -1280,7 +1277,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "cortexa73" | #define LIBNAME "cortexa73" | ||||
| #define CORENAME "CORTEXA73" | #define CORENAME "CORTEXA73" | ||||
| #else | |||||
| #endif | |||||
| #ifdef FORCE_CORTEXX1 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "CORTEXX1" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DCORTEXX1 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||||
| #define LIBNAME "cortexx1" | |||||
| #define CORENAME "CORTEXX1" | |||||
| #endif | |||||
| #ifdef FORCE_CORTEXX2 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "CORTEXX2" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DCORTEXX2 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||||
| #define LIBNAME "cortexx2" | |||||
| #define CORENAME "CORTEXX2" | |||||
| #endif | |||||
| #ifdef FORCE_CORTEXA510 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "CORTEXA510" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DCORTEXA510 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||||
| #define LIBNAME "cortexa510" | |||||
| #define CORENAME "CORTEXA510" | |||||
| #endif | |||||
| #ifdef FORCE_CORTEXA710 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "CORTEXA710" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DCORTEXA710 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||||
| #define LIBNAME "cortexa710" | |||||
| #define CORENAME "CORTEXA710" | |||||
| #endif | #endif | ||||
| #ifdef FORCE_NEOVERSEN1 | #ifdef FORCE_NEOVERSEN1 | ||||
| @@ -1297,7 +1349,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-march=armv8.2-a -mtune=neoverse-n1" | "-march=armv8.2-a -mtune=neoverse-n1" | ||||
| #define LIBNAME "neoversen1" | #define LIBNAME "neoversen1" | ||||
| #define CORENAME "NEOVERSEN1" | #define CORENAME "NEOVERSEN1" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_NEOVERSEV1 | #ifdef FORCE_NEOVERSEV1 | ||||
| @@ -1314,7 +1365,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-march=armv8.4-a -mtune=neoverse-v1" | "-march=armv8.4-a -mtune=neoverse-v1" | ||||
| #define LIBNAME "neoversev1" | #define LIBNAME "neoversev1" | ||||
| #define CORENAME "NEOVERSEV1" | #define CORENAME "NEOVERSEV1" | ||||
| #else | |||||
| #endif | #endif | ||||
| @@ -1332,7 +1382,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-march=armv8.5-a -mtune=neoverse-n2" | "-march=armv8.5-a -mtune=neoverse-n2" | ||||
| #define LIBNAME "neoversen2" | #define LIBNAME "neoversen2" | ||||
| #define CORENAME "NEOVERSEN2" | #define CORENAME "NEOVERSEN2" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_CORTEXA55 | #ifdef FORCE_CORTEXA55 | ||||
| @@ -1348,7 +1397,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "cortexa55" | #define LIBNAME "cortexa55" | ||||
| #define CORENAME "CORTEXA55" | #define CORENAME "CORTEXA55" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_FALKOR | #ifdef FORCE_FALKOR | ||||
| @@ -1364,7 +1412,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "falkor" | #define LIBNAME "falkor" | ||||
| #define CORENAME "FALKOR" | #define CORENAME "FALKOR" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_THUNDERX | #ifdef FORCE_THUNDERX | ||||
| @@ -1379,7 +1426,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "thunderx" | #define LIBNAME "thunderx" | ||||
| #define CORENAME "THUNDERX" | #define CORENAME "THUNDERX" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_THUNDERX2T99 | #ifdef FORCE_THUNDERX2T99 | ||||
| @@ -1397,7 +1443,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "thunderx2t99" | #define LIBNAME "thunderx2t99" | ||||
| #define CORENAME "THUNDERX2T99" | #define CORENAME "THUNDERX2T99" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_TSV110 | #ifdef FORCE_TSV110 | ||||
| @@ -1413,7 +1458,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "tsv110" | #define LIBNAME "tsv110" | ||||
| #define CORENAME "TSV110" | #define CORENAME "TSV110" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_EMAG8180 | #ifdef FORCE_EMAG8180 | ||||
| @@ -1448,7 +1492,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "thunderx3t110" | #define LIBNAME "thunderx3t110" | ||||
| #define CORENAME "THUNDERX3T110" | #define CORENAME "THUNDERX3T110" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_VORTEX | #ifdef FORCE_VORTEX | ||||
| @@ -1480,7 +1523,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8" | ||||
| #define LIBNAME "a64fx" | #define LIBNAME "a64fx" | ||||
| #define CORENAME "A64FX" | #define CORENAME "A64FX" | ||||
| #else | |||||
| #endif | |||||
| #ifdef FORCE_FT2000 | |||||
| #define ARMV8 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "FT2000" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DFT2000 " \ | |||||
| "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \ | |||||
| "-DL2_SIZE=33554426-DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||||
| #define LIBNAME "ft2000" | |||||
| #define CORENAME "FT2000" | |||||
| #endif | #endif | ||||
| #ifdef FORCE_ZARCH_GENERIC | #ifdef FORCE_ZARCH_GENERIC | ||||
| @@ -0,0 +1,216 @@ | |||||
| SAMINKERNEL = ../arm/amin.c | |||||
| DAMINKERNEL = ../arm/amin.c | |||||
| CAMINKERNEL = ../arm/zamin.c | |||||
| ZAMINKERNEL = ../arm/zamin.c | |||||
| SMAXKERNEL = ../arm/max.c | |||||
| DMAXKERNEL = ../arm/max.c | |||||
| SMINKERNEL = ../arm/min.c | |||||
| DMINKERNEL = ../arm/min.c | |||||
| ISAMINKERNEL = ../arm/iamin.c | |||||
| IDAMINKERNEL = ../arm/iamin.c | |||||
| ICAMINKERNEL = ../arm/izamin.c | |||||
| IZAMINKERNEL = ../arm/izamin.c | |||||
| ISMAXKERNEL = ../arm/imax.c | |||||
| IDMAXKERNEL = ../arm/imax.c | |||||
| ISMINKERNEL = ../arm/imin.c | |||||
| IDMINKERNEL = ../arm/imin.c | |||||
| STRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| STRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| STRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| STRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| DTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| DTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| DTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| DTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| TRSMCOPYLN_M = trsm_lncopy_sve.c | |||||
| TRSMCOPYLT_M = trsm_ltcopy_sve.c | |||||
| TRSMCOPYUN_M = trsm_uncopy_sve.c | |||||
| TRSMCOPYUT_M = trsm_utcopy_sve.c | |||||
| CTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| CTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| CTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| CTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c | |||||
| ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c | |||||
| ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c | |||||
| ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c | |||||
| SAMAXKERNEL = amax.S | |||||
| DAMAXKERNEL = amax.S | |||||
| CAMAXKERNEL = zamax.S | |||||
| ZAMAXKERNEL = zamax.S | |||||
| SAXPYKERNEL = axpy.S | |||||
| DAXPYKERNEL = axpy.S | |||||
| CAXPYKERNEL = zaxpy.S | |||||
| ZAXPYKERNEL = zaxpy.S | |||||
| SROTKERNEL = rot.S | |||||
| DROTKERNEL = rot.S | |||||
| CROTKERNEL = zrot.S | |||||
| ZROTKERNEL = zrot.S | |||||
| SSCALKERNEL = scal.S | |||||
| DSCALKERNEL = scal.S | |||||
| CSCALKERNEL = zscal.S | |||||
| ZSCALKERNEL = zscal.S | |||||
| SGEMVNKERNEL = gemv_n.S | |||||
| DGEMVNKERNEL = gemv_n.S | |||||
| CGEMVNKERNEL = zgemv_n.S | |||||
| ZGEMVNKERNEL = zgemv_n.S | |||||
| SGEMVTKERNEL = gemv_t.S | |||||
| DGEMVTKERNEL = gemv_t.S | |||||
| CGEMVTKERNEL = zgemv_t.S | |||||
| ZGEMVTKERNEL = zgemv_t.S | |||||
| SASUMKERNEL = asum.S | |||||
| DASUMKERNEL = asum.S | |||||
| CASUMKERNEL = casum.S | |||||
| ZASUMKERNEL = zasum.S | |||||
| SCOPYKERNEL = copy.S | |||||
| DCOPYKERNEL = copy.S | |||||
| CCOPYKERNEL = copy.S | |||||
| ZCOPYKERNEL = copy.S | |||||
| SSWAPKERNEL = swap.S | |||||
| DSWAPKERNEL = swap.S | |||||
| CSWAPKERNEL = swap.S | |||||
| ZSWAPKERNEL = swap.S | |||||
| ISAMAXKERNEL = iamax.S | |||||
| IDAMAXKERNEL = iamax.S | |||||
| ICAMAXKERNEL = izamax.S | |||||
| IZAMAXKERNEL = izamax.S | |||||
| SNRM2KERNEL = nrm2.S | |||||
| DNRM2KERNEL = nrm2.S | |||||
| CNRM2KERNEL = znrm2.S | |||||
| ZNRM2KERNEL = znrm2.S | |||||
| DDOTKERNEL = dot.S | |||||
| ifneq ($(C_COMPILER), PGI) | |||||
| SDOTKERNEL = ../generic/dot.c | |||||
| else | |||||
| SDOTKERNEL = dot.S | |||||
| endif | |||||
| ifneq ($(C_COMPILER), PGI) | |||||
| CDOTKERNEL = zdot.S | |||||
| ZDOTKERNEL = zdot.S | |||||
| else | |||||
| CDOTKERNEL = ../arm/zdot.c | |||||
| ZDOTKERNEL = ../arm/zdot.c | |||||
| endif | |||||
| DSDOTKERNEL = dot.S | |||||
| DGEMM_BETA = dgemm_beta.S | |||||
| SGEMM_BETA = sgemm_beta.S | |||||
| SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||||
| STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||||
| SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||||
| SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||||
| SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
| SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| STRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||||
| STRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||||
| STRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||||
| STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||||
| SSYMMUCOPY_M = symm_ucopy_sve.c | |||||
| SSYMMLCOPY_M = symm_lcopy_sve.c | |||||
| DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||||
| DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||||
| DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||||
| DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||||
| DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
| DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||||
| DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||||
| DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||||
| DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||||
| DSYMMUCOPY_M = symm_ucopy_sve.c | |||||
| DSYMMLCOPY_M = symm_lcopy_sve.c | |||||
| CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||||
| CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||||
| CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||||
| CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||||
| CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||||
| CHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||||
| CHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||||
| CSYMMUCOPY_M = zsymm_ucopy_sve.c | |||||
| CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||||
| ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||||
| ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||||
| ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||||
| ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||||
| ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||||
| ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||||
| ZHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||||
| ZSYMMUCOPY_M = zsymm_ucopy_sve.c | |||||
| ZSYMMLCOPY_M = zsymm_lcopy_sve.c | |||||
| @@ -0,0 +1,216 @@ | |||||
| SAMINKERNEL = ../arm/amin.c | |||||
| DAMINKERNEL = ../arm/amin.c | |||||
| CAMINKERNEL = ../arm/zamin.c | |||||
| ZAMINKERNEL = ../arm/zamin.c | |||||
| SMAXKERNEL = ../arm/max.c | |||||
| DMAXKERNEL = ../arm/max.c | |||||
| SMINKERNEL = ../arm/min.c | |||||
| DMINKERNEL = ../arm/min.c | |||||
| ISAMINKERNEL = ../arm/iamin.c | |||||
| IDAMINKERNEL = ../arm/iamin.c | |||||
| ICAMINKERNEL = ../arm/izamin.c | |||||
| IZAMINKERNEL = ../arm/izamin.c | |||||
| ISMAXKERNEL = ../arm/imax.c | |||||
| IDMAXKERNEL = ../arm/imax.c | |||||
| ISMINKERNEL = ../arm/imin.c | |||||
| IDMINKERNEL = ../arm/imin.c | |||||
| STRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| STRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| STRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| STRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| DTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| DTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| DTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| DTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| TRSMCOPYLN_M = trsm_lncopy_sve.c | |||||
| TRSMCOPYLT_M = trsm_ltcopy_sve.c | |||||
| TRSMCOPYUN_M = trsm_uncopy_sve.c | |||||
| TRSMCOPYUT_M = trsm_utcopy_sve.c | |||||
| CTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| CTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| CTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| CTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c | |||||
| ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c | |||||
| ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c | |||||
| ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c | |||||
| SAMAXKERNEL = amax.S | |||||
| DAMAXKERNEL = amax.S | |||||
| CAMAXKERNEL = zamax.S | |||||
| ZAMAXKERNEL = zamax.S | |||||
| SAXPYKERNEL = axpy.S | |||||
| DAXPYKERNEL = axpy.S | |||||
| CAXPYKERNEL = zaxpy.S | |||||
| ZAXPYKERNEL = zaxpy.S | |||||
| SROTKERNEL = rot.S | |||||
| DROTKERNEL = rot.S | |||||
| CROTKERNEL = zrot.S | |||||
| ZROTKERNEL = zrot.S | |||||
| SSCALKERNEL = scal.S | |||||
| DSCALKERNEL = scal.S | |||||
| CSCALKERNEL = zscal.S | |||||
| ZSCALKERNEL = zscal.S | |||||
| SGEMVNKERNEL = gemv_n.S | |||||
| DGEMVNKERNEL = gemv_n.S | |||||
| CGEMVNKERNEL = zgemv_n.S | |||||
| ZGEMVNKERNEL = zgemv_n.S | |||||
| SGEMVTKERNEL = gemv_t.S | |||||
| DGEMVTKERNEL = gemv_t.S | |||||
| CGEMVTKERNEL = zgemv_t.S | |||||
| ZGEMVTKERNEL = zgemv_t.S | |||||
| SASUMKERNEL = asum.S | |||||
| DASUMKERNEL = asum.S | |||||
| CASUMKERNEL = casum.S | |||||
| ZASUMKERNEL = zasum.S | |||||
| SCOPYKERNEL = copy.S | |||||
| DCOPYKERNEL = copy.S | |||||
| CCOPYKERNEL = copy.S | |||||
| ZCOPYKERNEL = copy.S | |||||
| SSWAPKERNEL = swap.S | |||||
| DSWAPKERNEL = swap.S | |||||
| CSWAPKERNEL = swap.S | |||||
| ZSWAPKERNEL = swap.S | |||||
| ISAMAXKERNEL = iamax.S | |||||
| IDAMAXKERNEL = iamax.S | |||||
| ICAMAXKERNEL = izamax.S | |||||
| IZAMAXKERNEL = izamax.S | |||||
| SNRM2KERNEL = nrm2.S | |||||
| DNRM2KERNEL = nrm2.S | |||||
| CNRM2KERNEL = znrm2.S | |||||
| ZNRM2KERNEL = znrm2.S | |||||
| DDOTKERNEL = dot.S | |||||
| ifneq ($(C_COMPILER), PGI) | |||||
| SDOTKERNEL = ../generic/dot.c | |||||
| else | |||||
| SDOTKERNEL = dot.S | |||||
| endif | |||||
| ifneq ($(C_COMPILER), PGI) | |||||
| CDOTKERNEL = zdot.S | |||||
| ZDOTKERNEL = zdot.S | |||||
| else | |||||
| CDOTKERNEL = ../arm/zdot.c | |||||
| ZDOTKERNEL = ../arm/zdot.c | |||||
| endif | |||||
| DSDOTKERNEL = dot.S | |||||
| DGEMM_BETA = dgemm_beta.S | |||||
| SGEMM_BETA = sgemm_beta.S | |||||
| SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||||
| STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||||
| SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||||
| SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||||
| SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
| SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| STRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||||
| STRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||||
| STRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||||
| STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||||
| SSYMMUCOPY_M = symm_ucopy_sve.c | |||||
| SSYMMLCOPY_M = symm_lcopy_sve.c | |||||
| DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||||
| DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||||
| DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||||
| DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||||
| DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
| DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||||
| DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||||
| DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||||
| DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||||
| DSYMMUCOPY_M = symm_ucopy_sve.c | |||||
| DSYMMLCOPY_M = symm_lcopy_sve.c | |||||
| CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||||
| CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||||
| CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||||
| CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||||
| CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||||
| CHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||||
| CHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||||
| CSYMMUCOPY_M = zsymm_ucopy_sve.c | |||||
| CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||||
| ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||||
| ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||||
| ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||||
| ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||||
| ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||||
| ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||||
| ZHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||||
| ZSYMMUCOPY_M = zsymm_ucopy_sve.c | |||||
| ZSYMMLCOPY_M = zsymm_lcopy_sve.c | |||||
| @@ -0,0 +1,216 @@ | |||||
| SAMINKERNEL = ../arm/amin.c | |||||
| DAMINKERNEL = ../arm/amin.c | |||||
| CAMINKERNEL = ../arm/zamin.c | |||||
| ZAMINKERNEL = ../arm/zamin.c | |||||
| SMAXKERNEL = ../arm/max.c | |||||
| DMAXKERNEL = ../arm/max.c | |||||
| SMINKERNEL = ../arm/min.c | |||||
| DMINKERNEL = ../arm/min.c | |||||
| ISAMINKERNEL = ../arm/iamin.c | |||||
| IDAMINKERNEL = ../arm/iamin.c | |||||
| ICAMINKERNEL = ../arm/izamin.c | |||||
| IZAMINKERNEL = ../arm/izamin.c | |||||
| ISMAXKERNEL = ../arm/imax.c | |||||
| IDMAXKERNEL = ../arm/imax.c | |||||
| ISMINKERNEL = ../arm/imin.c | |||||
| IDMINKERNEL = ../arm/imin.c | |||||
| STRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| STRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| STRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| STRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| DTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| DTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| DTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| DTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| TRSMCOPYLN_M = trsm_lncopy_sve.c | |||||
| TRSMCOPYLT_M = trsm_ltcopy_sve.c | |||||
| TRSMCOPYUN_M = trsm_uncopy_sve.c | |||||
| TRSMCOPYUT_M = trsm_utcopy_sve.c | |||||
| CTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| CTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| CTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| CTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c | |||||
| ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c | |||||
| ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c | |||||
| ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c | |||||
| SAMAXKERNEL = amax.S | |||||
| DAMAXKERNEL = amax.S | |||||
| CAMAXKERNEL = zamax.S | |||||
| ZAMAXKERNEL = zamax.S | |||||
| SAXPYKERNEL = axpy.S | |||||
| DAXPYKERNEL = axpy.S | |||||
| CAXPYKERNEL = zaxpy.S | |||||
| ZAXPYKERNEL = zaxpy.S | |||||
| SROTKERNEL = rot.S | |||||
| DROTKERNEL = rot.S | |||||
| CROTKERNEL = zrot.S | |||||
| ZROTKERNEL = zrot.S | |||||
| SSCALKERNEL = scal.S | |||||
| DSCALKERNEL = scal.S | |||||
| CSCALKERNEL = zscal.S | |||||
| ZSCALKERNEL = zscal.S | |||||
| SGEMVNKERNEL = gemv_n.S | |||||
| DGEMVNKERNEL = gemv_n.S | |||||
| CGEMVNKERNEL = zgemv_n.S | |||||
| ZGEMVNKERNEL = zgemv_n.S | |||||
| SGEMVTKERNEL = gemv_t.S | |||||
| DGEMVTKERNEL = gemv_t.S | |||||
| CGEMVTKERNEL = zgemv_t.S | |||||
| ZGEMVTKERNEL = zgemv_t.S | |||||
| SASUMKERNEL = asum.S | |||||
| DASUMKERNEL = asum.S | |||||
| CASUMKERNEL = casum.S | |||||
| ZASUMKERNEL = zasum.S | |||||
| SCOPYKERNEL = copy.S | |||||
| DCOPYKERNEL = copy.S | |||||
| CCOPYKERNEL = copy.S | |||||
| ZCOPYKERNEL = copy.S | |||||
| SSWAPKERNEL = swap.S | |||||
| DSWAPKERNEL = swap.S | |||||
| CSWAPKERNEL = swap.S | |||||
| ZSWAPKERNEL = swap.S | |||||
| ISAMAXKERNEL = iamax.S | |||||
| IDAMAXKERNEL = iamax.S | |||||
| ICAMAXKERNEL = izamax.S | |||||
| IZAMAXKERNEL = izamax.S | |||||
| SNRM2KERNEL = nrm2.S | |||||
| DNRM2KERNEL = nrm2.S | |||||
| CNRM2KERNEL = znrm2.S | |||||
| ZNRM2KERNEL = znrm2.S | |||||
| DDOTKERNEL = dot.S | |||||
| ifneq ($(C_COMPILER), PGI) | |||||
| SDOTKERNEL = ../generic/dot.c | |||||
| else | |||||
| SDOTKERNEL = dot.S | |||||
| endif | |||||
| ifneq ($(C_COMPILER), PGI) | |||||
| CDOTKERNEL = zdot.S | |||||
| ZDOTKERNEL = zdot.S | |||||
| else | |||||
| CDOTKERNEL = ../arm/zdot.c | |||||
| ZDOTKERNEL = ../arm/zdot.c | |||||
| endif | |||||
| DSDOTKERNEL = dot.S | |||||
| DGEMM_BETA = dgemm_beta.S | |||||
| SGEMM_BETA = sgemm_beta.S | |||||
| SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||||
| STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||||
| SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||||
| SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||||
| SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
| SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| STRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||||
| STRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||||
| STRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||||
| STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||||
| SSYMMUCOPY_M = symm_ucopy_sve.c | |||||
| SSYMMLCOPY_M = symm_lcopy_sve.c | |||||
| DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||||
| DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||||
| DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||||
| DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||||
| DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
| DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||||
| DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||||
| DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||||
| DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||||
| DSYMMUCOPY_M = symm_ucopy_sve.c | |||||
| DSYMMLCOPY_M = symm_lcopy_sve.c | |||||
| CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||||
| CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||||
| CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||||
| CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||||
| CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||||
| CHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||||
| CHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||||
| CSYMMUCOPY_M = zsymm_ucopy_sve.c | |||||
| CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||||
| ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||||
| ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||||
| ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||||
| ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||||
| ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||||
| ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||||
| ZHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||||
| ZSYMMUCOPY_M = zsymm_ucopy_sve.c | |||||
| ZSYMMLCOPY_M = zsymm_lcopy_sve.c | |||||
| @@ -0,0 +1,216 @@ | |||||
| SAMINKERNEL = ../arm/amin.c | |||||
| DAMINKERNEL = ../arm/amin.c | |||||
| CAMINKERNEL = ../arm/zamin.c | |||||
| ZAMINKERNEL = ../arm/zamin.c | |||||
| SMAXKERNEL = ../arm/max.c | |||||
| DMAXKERNEL = ../arm/max.c | |||||
| SMINKERNEL = ../arm/min.c | |||||
| DMINKERNEL = ../arm/min.c | |||||
| ISAMINKERNEL = ../arm/iamin.c | |||||
| IDAMINKERNEL = ../arm/iamin.c | |||||
| ICAMINKERNEL = ../arm/izamin.c | |||||
| IZAMINKERNEL = ../arm/izamin.c | |||||
| ISMAXKERNEL = ../arm/imax.c | |||||
| IDMAXKERNEL = ../arm/imax.c | |||||
| ISMINKERNEL = ../arm/imin.c | |||||
| IDMINKERNEL = ../arm/imin.c | |||||
| STRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| STRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| STRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| STRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| DTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| DTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| DTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| DTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| TRSMCOPYLN_M = trsm_lncopy_sve.c | |||||
| TRSMCOPYLT_M = trsm_ltcopy_sve.c | |||||
| TRSMCOPYUN_M = trsm_uncopy_sve.c | |||||
| TRSMCOPYUT_M = trsm_utcopy_sve.c | |||||
| CTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| CTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| CTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| CTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c | |||||
| ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c | |||||
| ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c | |||||
| ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c | |||||
| SAMAXKERNEL = amax.S | |||||
| DAMAXKERNEL = amax.S | |||||
| CAMAXKERNEL = zamax.S | |||||
| ZAMAXKERNEL = zamax.S | |||||
| SAXPYKERNEL = axpy.S | |||||
| DAXPYKERNEL = axpy.S | |||||
| CAXPYKERNEL = zaxpy.S | |||||
| ZAXPYKERNEL = zaxpy.S | |||||
| SROTKERNEL = rot.S | |||||
| DROTKERNEL = rot.S | |||||
| CROTKERNEL = zrot.S | |||||
| ZROTKERNEL = zrot.S | |||||
| SSCALKERNEL = scal.S | |||||
| DSCALKERNEL = scal.S | |||||
| CSCALKERNEL = zscal.S | |||||
| ZSCALKERNEL = zscal.S | |||||
| SGEMVNKERNEL = gemv_n.S | |||||
| DGEMVNKERNEL = gemv_n.S | |||||
| CGEMVNKERNEL = zgemv_n.S | |||||
| ZGEMVNKERNEL = zgemv_n.S | |||||
| SGEMVTKERNEL = gemv_t.S | |||||
| DGEMVTKERNEL = gemv_t.S | |||||
| CGEMVTKERNEL = zgemv_t.S | |||||
| ZGEMVTKERNEL = zgemv_t.S | |||||
| SASUMKERNEL = asum.S | |||||
| DASUMKERNEL = asum.S | |||||
| CASUMKERNEL = casum.S | |||||
| ZASUMKERNEL = zasum.S | |||||
| SCOPYKERNEL = copy.S | |||||
| DCOPYKERNEL = copy.S | |||||
| CCOPYKERNEL = copy.S | |||||
| ZCOPYKERNEL = copy.S | |||||
| SSWAPKERNEL = swap.S | |||||
| DSWAPKERNEL = swap.S | |||||
| CSWAPKERNEL = swap.S | |||||
| ZSWAPKERNEL = swap.S | |||||
| ISAMAXKERNEL = iamax.S | |||||
| IDAMAXKERNEL = iamax.S | |||||
| ICAMAXKERNEL = izamax.S | |||||
| IZAMAXKERNEL = izamax.S | |||||
| SNRM2KERNEL = nrm2.S | |||||
| DNRM2KERNEL = nrm2.S | |||||
| CNRM2KERNEL = znrm2.S | |||||
| ZNRM2KERNEL = znrm2.S | |||||
| DDOTKERNEL = dot.S | |||||
| ifneq ($(C_COMPILER), PGI) | |||||
| SDOTKERNEL = ../generic/dot.c | |||||
| else | |||||
| SDOTKERNEL = dot.S | |||||
| endif | |||||
| ifneq ($(C_COMPILER), PGI) | |||||
| CDOTKERNEL = zdot.S | |||||
| ZDOTKERNEL = zdot.S | |||||
| else | |||||
| CDOTKERNEL = ../arm/zdot.c | |||||
| ZDOTKERNEL = ../arm/zdot.c | |||||
| endif | |||||
| DSDOTKERNEL = dot.S | |||||
| DGEMM_BETA = dgemm_beta.S | |||||
| SGEMM_BETA = sgemm_beta.S | |||||
| SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||||
| STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||||
| SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||||
| SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||||
| SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
| SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| STRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||||
| STRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||||
| STRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||||
| STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||||
| SSYMMUCOPY_M = symm_ucopy_sve.c | |||||
| SSYMMLCOPY_M = symm_lcopy_sve.c | |||||
| DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||||
| DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||||
| DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||||
| DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||||
| DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
| DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||||
| DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||||
| DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||||
| DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||||
| DSYMMUCOPY_M = symm_ucopy_sve.c | |||||
| DSYMMLCOPY_M = symm_lcopy_sve.c | |||||
| CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||||
| CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||||
| CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||||
| CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||||
| CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||||
| CHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||||
| CHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||||
| CSYMMUCOPY_M = zsymm_ucopy_sve.c | |||||
| CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||||
| ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||||
| ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||||
| ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||||
| ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||||
| ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||||
| ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||||
| ZHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||||
| ZSYMMUCOPY_M = zsymm_ucopy_sve.c | |||||
| ZSYMMLCOPY_M = zsymm_lcopy_sve.c | |||||
| @@ -0,0 +1,3 @@ | |||||
| include $(KERNELDIR)/KERNEL.CORTEXA57 | |||||
| @@ -3130,7 +3130,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(CORTEXA57) || \ | #if defined(CORTEXA57) || \ | ||||
| defined(CORTEXA72) || defined(CORTEXA73) || \ | defined(CORTEXA72) || defined(CORTEXA73) || \ | ||||
| defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) | |||||
| defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000) | |||||
| #define SGEMM_DEFAULT_UNROLL_M 16 | #define SGEMM_DEFAULT_UNROLL_M 16 | ||||
| #define SGEMM_DEFAULT_UNROLL_N 4 | #define SGEMM_DEFAULT_UNROLL_N 4 | ||||
| @@ -3377,7 +3377,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d | |||||
| #define CGEMM_DEFAULT_R 4096 | #define CGEMM_DEFAULT_R 4096 | ||||
| #define ZGEMM_DEFAULT_R 4096 | #define ZGEMM_DEFAULT_R 4096 | ||||
| #elif defined(ARMV8SVE) || defined(A64FX) | |||||
| #elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510) | |||||
| /* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl". | /* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl". | ||||
| Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */ | Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */ | ||||