| @@ -23,9 +23,9 @@ if(MSVC AND NOT DEFINED NOFORTRAN) | |||
| endif() | |||
| ####### | |||
| if(MSVC) | |||
| option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" ON) | |||
| endif() | |||
| option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" OFF) | |||
| option(BUILD_TESTING "Build LAPACK testsuite when building LAPACK" ON) | |||
| option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF) | |||
| @@ -320,7 +320,9 @@ if (NOT NOFORTRAN) | |||
| if(NOT NO_CBLAS) | |||
| add_subdirectory(ctest) | |||
| endif() | |||
| add_subdirectory(lapack-netlib/TESTING) | |||
| if (BUILD_TESTING) | |||
| add_subdirectory(lapack-netlib/TESTING) | |||
| endif() | |||
| if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV) | |||
| add_subdirectory(cpp_thread_test) | |||
| endif() | |||
| @@ -55,6 +55,13 @@ FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73 | |||
| endif | |||
| endif | |||
| ifeq ($(CORE), FT2000) | |||
| CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||
| ifneq ($(F_COMPILER), NAG) | |||
| FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||
| endif | |||
| endif | |||
| # Use a72 tunings because Neoverse-N1 is only available | |||
| # in GCC>=9 | |||
| ifeq ($(CORE), NEOVERSEN1) | |||
| @@ -229,6 +236,43 @@ endif | |||
| endif | |||
| endif | |||
| ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
| ifeq ($(CORE), CORTEXX1) | |||
| CCOMMON_OPT += -march=armv8.2-a -mtune=cortexa72 | |||
| ifneq ($(F_COMPILER), NAG) | |||
| FCOMMON_OPT += -march=armv8.2-a -mtune=cortexa72 | |||
| endif | |||
| endif | |||
| endif | |||
| ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
| ifeq ($(CORE), CORTEXX2) | |||
| CCOMMON_OPT += -march=armv8.4-a+sve | |||
| ifneq ($(F_COMPILER), NAG) | |||
| FCOMMON_OPT += -march=armv8.4-a+sve | |||
| endif | |||
| endif | |||
| endif | |||
| #ifeq (1, $(filter 1,$(ISCLANG))) | |||
| ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
| ifeq ($(CORE), CORTEXA510) | |||
| CCOMMON_OPT += -march=armv8.4-a+sve | |||
| ifneq ($(F_COMPILER), NAG) | |||
| FCOMMON_OPT += -march=armv8.4-a+sve | |||
| endif | |||
| endif | |||
| endif | |||
| ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
| ifeq ($(CORE), CORTEXA710) | |||
| CCOMMON_OPT += -march=armv8.4-a+sve | |||
| ifneq ($(F_COMPILER), NAG) | |||
| FCOMMON_OPT += -march=armv8.4-a+sve | |||
| endif | |||
| endif | |||
| endif | |||
| endif | |||
| endif | |||
| @@ -71,7 +71,8 @@ endif | |||
| getarch : getarch.c cpuid.S dummy $(CPUIDEMU) | |||
| $(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) -o $(@F) getarch.c cpuid.S $(CPUIDEMU) | |||
| avx512=$$(perl c_check - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \ | |||
| $(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} -o $(@F) getarch.c cpuid.S $(CPUIDEMU) | |||
| getarch_2nd : getarch_2nd.c config.h dummy | |||
| ifndef TARGET_CORE | |||
| @@ -92,6 +92,10 @@ CORTEXA53 | |||
| CORTEXA57 | |||
| CORTEXA72 | |||
| CORTEXA73 | |||
| CORTEXA510 | |||
| CORTEXA710 | |||
| CORTEXX1 | |||
| CORTEXX2 | |||
| NEOVERSEN1 | |||
| NEOVERSEV1 | |||
| NEOVERSEN2 | |||
| @@ -103,6 +107,9 @@ THUNDERX2T99 | |||
| TSV110 | |||
| THUNDERX3T110 | |||
| VORTEX | |||
| A64FX | |||
| ARMV8SVE | |||
| FT2000 | |||
| 9.System Z: | |||
| ZARCH_GENERIC | |||
| @@ -65,7 +65,7 @@ jobs: | |||
| - task: CMake@1 | |||
| inputs: | |||
| workingDirectory: 'build' # Optional | |||
| cmakeArgs: '-G "Visual Studio 16 2019" ..' | |||
| cmakeArgs: '-G "Visual Studio 17 2022" ..' | |||
| - task: CMake@1 | |||
| inputs: | |||
| cmakeArgs: '--build . --config Release' | |||
| @@ -103,7 +103,7 @@ jobs: | |||
| - job: Windows_flang_clang | |||
| pool: | |||
| vmImage: 'windows-latest' | |||
| vmImage: 'windows-2022' | |||
| steps: | |||
| - script: | | |||
| set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%" | |||
| @@ -114,8 +114,8 @@ jobs: | |||
| conda install --yes --quiet ninja flang | |||
| mkdir build | |||
| cd build | |||
| call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat" | |||
| cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON .. | |||
| call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" | |||
| cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON .. | |||
| cmake --build . --config Release | |||
| ctest | |||
| @@ -178,7 +178,7 @@ jobs: | |||
| cmake -DTARGET=CORE2 -DDYNAMIC_ARCH=1 -DCMAKE_C_COMPILER=gcc-10 -DCMAKE_Fortran_COMPILER=gfortran-10 -DBUILD_SHARED_LIBS=ON .. | |||
| cmake --build . | |||
| ctest | |||
| - job: OSX_Ifort_Clang | |||
| pool: | |||
| vmImage: 'macOS-10.15' | |||
| @@ -254,7 +254,7 @@ if (($architecture eq "x86") || ($architecture eq "x86_64")) { | |||
| # $tmpf = new File::Temp( UNLINK => 1 ); | |||
| ($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 ); | |||
| $code = '"vbroadcastss -4 * 4(%rsi), %zmm2"'; | |||
| print $tmpf "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n"; | |||
| print $fh "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n"; | |||
| $args = " -march=skylake-avx512 -c -o $tmpf.o $tmpf"; | |||
| if ($compiler eq "PGI") { | |||
| $args = " -tp skylake -c -o $tmpf.o $tmpf"; | |||
| @@ -278,7 +278,7 @@ if ($data =~ /HAVE_C11/) { | |||
| $c11_atomics = 0; | |||
| } else { | |||
| ($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 ); | |||
| print $tmpf "#include <stdatomic.h>\nint main(void){}\n"; | |||
| print $fh "#include <stdatomic.h>\nint main(void){}\n"; | |||
| $args = " -c -o $tmpf.o $tmpf"; | |||
| my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); | |||
| system(@cmd) == 0; | |||
| @@ -316,6 +316,7 @@ if ($architecture ne $hostarch) { | |||
| } | |||
| $cross = 1 if ($os ne $hostos); | |||
| $cross = 0 if (($os eq "Android") && ($hostos eq "Linux") && ($ENV{TERMUX_APP_PID} != "")); | |||
| $openmp = "" if $ENV{USE_OPENMP} != 1; | |||
| @@ -161,6 +161,30 @@ if (${CORE} STREQUAL ARMV8SVE) | |||
| endif () | |||
| endif () | |||
| if (${CORE} STREQUAL CORTEXA510) | |||
| if (NOT DYNAMIC_ARCH) | |||
| set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") | |||
| endif () | |||
| endif () | |||
| if (${CORE} STREQUAL CORTEXA710) | |||
| if (NOT DYNAMIC_ARCH) | |||
| set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") | |||
| endif () | |||
| endif () | |||
| if (${CORE} STREQUAL CORTEXX1) | |||
| if (NOT DYNAMIC_ARCH) | |||
| set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a") | |||
| endif () | |||
| endif () | |||
| if (${CORE} STREQUAL CORTEXX2) | |||
| if (NOT DYNAMIC_ARCH) | |||
| set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") | |||
| endif () | |||
| endif () | |||
| if (${CORE} STREQUAL POWER10) | |||
| if (NOT DYNAMIC_ARCH) | |||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||
| @@ -67,7 +67,15 @@ if (${F_COMPILER} STREQUAL "GFORTRAN") | |||
| if (BINARY64) | |||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | |||
| if (INTERFACE64) | |||
| set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8") | |||
| if (CMAKE_Fortran_COMPILER_ID STREQUAL "Intel") | |||
| if (WIN32) | |||
| set(FCOMMON_OPT "${FCOMMON_OPT} /integer-size:64") | |||
| else () | |||
| set(FCOMMON_OPT "${FCOMMON_OPT} -integer-size 64") | |||
| endif () | |||
| else () | |||
| set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8") | |||
| endif () | |||
| endif () | |||
| else () | |||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m32") | |||
| @@ -2610,8 +2610,9 @@ | |||
| #endif | |||
| #ifndef ASSEMBLER | |||
| #if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)\ | |||
| || defined(ARCH_LOONGARCH64) || defined(ARCH_E2K) | |||
| #if !defined(DYNAMIC_ARCH) \ | |||
| && (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \ | |||
| || defined(ARCH_LOONGARCH64) || defined(ARCH_E2K)) | |||
| extern BLASLONG gemm_offset_a; | |||
| extern BLASLONG gemm_offset_b; | |||
| extern BLASLONG sbgemm_p; | |||
| @@ -45,6 +45,10 @@ size_t length64=sizeof(value64); | |||
| #define CPU_NEOVERSEN1 11 | |||
| #define CPU_NEOVERSEV1 16 | |||
| #define CPU_NEOVERSEN2 17 | |||
| #define CPU_CORTEXX1 18 | |||
| #define CPU_CORTEXX2 19 | |||
| #define CPU_CORTEXA510 20 | |||
| #define CPU_CORTEXA710 21 | |||
| // Qualcomm | |||
| #define CPU_FALKOR 6 | |||
| // Cavium | |||
| @@ -59,6 +63,8 @@ size_t length64=sizeof(value64); | |||
| #define CPU_VORTEX 13 | |||
| // Fujitsu | |||
| #define CPU_A64FX 15 | |||
| // Phytium | |||
| #define CPU_FT2000 22 | |||
| static char *cpuname[] = { | |||
| "UNKNOWN", | |||
| @@ -73,12 +79,17 @@ static char *cpuname[] = { | |||
| "TSV110", | |||
| "EMAG8180", | |||
| "NEOVERSEN1", | |||
| "NEOVERSEV1" | |||
| "NEOVERSEN2" | |||
| "THUNDERX3T110", | |||
| "VORTEX", | |||
| "CORTEXA55", | |||
| "A64FX" | |||
| "A64FX", | |||
| "NEOVERSEV1", | |||
| "NEOVERSEN2", | |||
| "CORTEXX1", | |||
| "CORTEXX2", | |||
| "CORTEXA510", | |||
| "CORTEXA710", | |||
| "FT2000" | |||
| }; | |||
| static char *cpuname_lower[] = { | |||
| @@ -94,12 +105,17 @@ static char *cpuname_lower[] = { | |||
| "tsv110", | |||
| "emag8180", | |||
| "neoversen1", | |||
| "neoversev1", | |||
| "neoversen2", | |||
| "thunderx3t110", | |||
| "vortex", | |||
| "cortexa55", | |||
| "a64fx" | |||
| "a64fx", | |||
| "neoversev1", | |||
| "neoversen2", | |||
| "cortexx1", | |||
| "cortexx2", | |||
| "cortexa510", | |||
| "cortexa710", | |||
| "ft2000" | |||
| }; | |||
| int get_feature(char *search) | |||
| @@ -182,6 +198,14 @@ int detect(void) | |||
| return CPU_NEOVERSEN2; | |||
| else if (strstr(cpu_part, "0xd05")) | |||
| return CPU_CORTEXA55; | |||
| else if (strstr(cpu_part, "0xd46")) | |||
| return CPU_CORTEXA510; | |||
| else if (strstr(cpu_part, "0xd47")) | |||
| return CPU_CORTEXA710; | |||
| else if (strstr(cpu_part, "0xd44")) | |||
| return CPU_CORTEXX1; | |||
| else if (strstr(cpu_part, "0xd4c")) | |||
| return CPU_CORTEXX2; | |||
| } | |||
| // Qualcomm | |||
| else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) | |||
| @@ -202,6 +226,13 @@ int detect(void) | |||
| // Fujitsu | |||
| else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001")) | |||
| return CPU_A64FX; | |||
| // Apple | |||
| else if (strstr(cpu_implementer, "0x61") && strstr(cpu_part, "0x022")) | |||
| return CPU_VORTEX; | |||
| // Phytium | |||
| else if (strstr(cpu_implementer, "0x70") && (strstr(cpu_part, "0x660") || strstr(cpu_part, "0x661") | |||
| || strstr(cpu_part, "0x662") || strstr(cpu_part, "0x663"))) | |||
| return CPU_FT2000; | |||
| } | |||
| p = (char *) NULL ; | |||
| @@ -382,7 +413,24 @@ void get_cpuconfig(void) | |||
| printf("#define DTB_DEFAULT_ENTRIES 48\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| break; | |||
| case CPU_CORTEXA510: | |||
| case CPU_CORTEXA710: | |||
| case CPU_CORTEXX1: | |||
| case CPU_CORTEXX2: | |||
| printf("#define ARMV9\n"); | |||
| printf("#define %s\n", cpuname[d]); | |||
| printf("#define L1_CODE_SIZE 65536\n"); | |||
| printf("#define L1_CODE_LINESIZE 64\n"); | |||
| printf("#define L1_CODE_ASSOCIATIVE 4\n"); | |||
| printf("#define L1_DATA_SIZE 65536\n"); | |||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||
| printf("#define L1_DATA_ASSOCIATIVE 4\n"); | |||
| printf("#define L2_SIZE 1048576\n"); | |||
| printf("#define L2_LINESIZE 64\n"); | |||
| printf("#define L2_ASSOCIATIVE 8\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| break; | |||
| case CPU_FALKOR: | |||
| printf("#define FALKOR\n"); | |||
| printf("#define L1_CODE_SIZE 65536\n"); | |||
| @@ -469,9 +517,9 @@ void get_cpuconfig(void) | |||
| printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | |||
| printf("#define DTB_SIZE 4096 \n"); | |||
| break; | |||
| #ifdef __APPLE__ | |||
| case CPU_VORTEX: | |||
| printf("#define VORTEX \n"); | |||
| #ifdef __APPLE__ | |||
| sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0); | |||
| printf("#define L1_CODE_SIZE %lld \n",value64); | |||
| sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0); | |||
| @@ -480,10 +528,10 @@ void get_cpuconfig(void) | |||
| printf("#define L1_DATA_SIZE %lld \n",value64); | |||
| sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0); | |||
| printf("#define L2_SIZE %lld \n",value64); | |||
| #endif | |||
| printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | |||
| printf("#define DTB_SIZE 4096 \n"); | |||
| break; | |||
| #endif | |||
| case CPU_A64FX: | |||
| printf("#define A64FX\n"); | |||
| printf("#define L1_CODE_SIZE 65535\n"); | |||
| @@ -494,6 +542,16 @@ void get_cpuconfig(void) | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| break; | |||
| case CPU_FT2000: | |||
| printf("#define FT2000\n"); | |||
| printf("#define L1_CODE_SIZE 32768\n"); | |||
| printf("#define L1_DATA_SIZE 32768\n"); | |||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||
| printf("#define L2_SIZE 33554432\n"); | |||
| printf("#define L2_LINESIZE 64\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| break; | |||
| } | |||
| get_cpucount(); | |||
| } | |||
| @@ -1707,8 +1707,18 @@ int get_cpuname(void){ | |||
| if (model == 0xf && stepping < 0xe) | |||
| return CPUTYPE_NANO; | |||
| return CPUTYPE_NEHALEM; | |||
| case 0x7: | |||
| switch (exmodel) { | |||
| case 5: | |||
| if (support_avx2()) | |||
| return CPUTYPE_ZEN; | |||
| else | |||
| return CPUTYPE_DUNNINGTON; | |||
| default: | |||
| return CPUTYPE_NEHALEM; | |||
| } | |||
| default: | |||
| if (family >= 0x7) | |||
| if (family >= 0x8) | |||
| return CPUTYPE_NEHALEM; | |||
| else | |||
| return CPUTYPE_VIAC3; | |||
| @@ -1716,7 +1726,20 @@ int get_cpuname(void){ | |||
| } | |||
| if (vendor == VENDOR_ZHAOXIN){ | |||
| return CPUTYPE_NEHALEM; | |||
| switch (family) { | |||
| case 0x7: | |||
| switch (exmodel) { | |||
| case 5: | |||
| if (support_avx2()) | |||
| return CPUTYPE_ZEN; | |||
| else | |||
| return CPUTYPE_DUNNINGTON; | |||
| default: | |||
| return CPUTYPE_NEHALEM; | |||
| } | |||
| default: | |||
| return CPUTYPE_NEHALEM; | |||
| } | |||
| } | |||
| if (vendor == VENDOR_RISE){ | |||
| @@ -2416,8 +2439,18 @@ int get_coretype(void){ | |||
| if (model == 0xf && stepping < 0xe) | |||
| return CORE_NANO; | |||
| return CORE_NEHALEM; | |||
| case 0x7: | |||
| switch (exmodel) { | |||
| case 5: | |||
| if (support_avx2()) | |||
| return CORE_ZEN; | |||
| else | |||
| return CORE_DUNNINGTON; | |||
| default: | |||
| return CORE_NEHALEM; | |||
| } | |||
| default: | |||
| if (family >= 0x7) | |||
| if (family >= 0x8) | |||
| return CORE_NEHALEM; | |||
| else | |||
| return CORE_VIAC3; | |||
| @@ -2425,7 +2458,20 @@ int get_coretype(void){ | |||
| } | |||
| if (vendor == VENDOR_ZHAOXIN) { | |||
| return CORE_NEHALEM; | |||
| switch (family) { | |||
| case 0x7: | |||
| switch (exmodel) { | |||
| case 5: | |||
| if (support_avx2()) | |||
| return CORE_ZEN; | |||
| else | |||
| return CORE_DUNNINGTON; | |||
| default: | |||
| return CORE_NEHALEM; | |||
| } | |||
| default: | |||
| return CORE_NEHALEM; | |||
| } | |||
| } | |||
| return CORE_UNKNOWN; | |||
| @@ -96,7 +96,7 @@ extern gotoblas_t gotoblas_BARCELONA; | |||
| #endif | |||
| #ifdef DYN_ATOM | |||
| extern gotoblas_t gotoblas_ATOM; | |||
| elif defined(DYN_NEHALEM) | |||
| #elif defined(DYN_NEHALEM) | |||
| #define gotoblas_ATOM gotoblas_NEHALEM | |||
| #else | |||
| #define gotoblas_ATOM gotoblas_PRESCOTT | |||
| @@ -875,14 +875,37 @@ static gotoblas_t *get_coretype(void){ | |||
| if (model == 0xf && stepping < 0xe) | |||
| return &gotoblas_NANO; | |||
| return &gotoblas_NEHALEM; | |||
| case 0x7: | |||
| switch (exmodel) { | |||
| case 5: | |||
| if (support_avx2()) | |||
| return &gotoblas_ZEN; | |||
| else | |||
| return &gotoblas_DUNNINGTON; | |||
| default: | |||
| return &gotoblas_NEHALEM; | |||
| } | |||
| default: | |||
| if (family >= 0x7) | |||
| if (family >= 0x8) | |||
| return &gotoblas_NEHALEM; | |||
| } | |||
| } | |||
| if (vendor == VENDOR_ZHAOXIN) { | |||
| return &gotoblas_NEHALEM; | |||
| switch (family) { | |||
| case 0x7: | |||
| switch (exmodel) { | |||
| case 5: | |||
| if (support_avx2()) | |||
| return &gotoblas_ZEN; | |||
| else | |||
| return &gotoblas_DUNNINGTON; | |||
| default: | |||
| return &gotoblas_NEHALEM; | |||
| } | |||
| default: | |||
| return &gotoblas_NEHALEM; | |||
| } | |||
| } | |||
| return NULL; | |||
| @@ -60,6 +60,9 @@ static char* openblas_config_str="" | |||
| #ifdef USE_OPENMP | |||
| "USE_OPENMP " | |||
| #endif | |||
| #ifdef USE_TLS | |||
| "USE_TLS " | |||
| #endif | |||
| #ifndef DYNAMIC_ARCH | |||
| CHAR_CORENAME | |||
| #endif | |||
| @@ -94,14 +94,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include <sys/sysinfo.h> | |||
| #endif | |||
| #if defined(__x86_64__) || defined(_M_X64) | |||
| #if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX2__)) || (defined(__clang__) && __clang_major__ >= 6)) | |||
| #else | |||
| #ifndef NO_AVX512 | |||
| #define NO_AVX512 | |||
| #endif | |||
| #endif | |||
| #endif | |||
| /* #define FORCE_P2 */ | |||
| /* #define FORCE_KATMAI */ | |||
| /* #define FORCE_COPPERMINE */ | |||
| @@ -1240,7 +1232,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "cortexa53" | |||
| #define CORENAME "CORTEXA53" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_CORTEXA57 | |||
| @@ -1256,7 +1247,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "cortexa57" | |||
| #define CORENAME "CORTEXA57" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_CORTEXA72 | |||
| @@ -1272,7 +1262,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "cortexa72" | |||
| #define CORENAME "CORTEXA72" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_CORTEXA73 | |||
| @@ -1288,7 +1277,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "cortexa73" | |||
| #define CORENAME "CORTEXA73" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_CORTEXX1 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "CORTEXX1" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DCORTEXX1 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "cortexx1" | |||
| #define CORENAME "CORTEXX1" | |||
| #endif | |||
| #ifdef FORCE_CORTEXX2 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "CORTEXX2" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DCORTEXX2 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||
| #define LIBNAME "cortexx2" | |||
| #define CORENAME "CORTEXX2" | |||
| #endif | |||
| #ifdef FORCE_CORTEXA510 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "CORTEXA510" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DCORTEXA510 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||
| #define LIBNAME "cortexa510" | |||
| #define CORENAME "CORTEXA510" | |||
| #endif | |||
| #ifdef FORCE_CORTEXA710 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "CORTEXA710" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DCORTEXA710 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||
| #define LIBNAME "cortexa710" | |||
| #define CORENAME "CORTEXA710" | |||
| #endif | |||
| #ifdef FORCE_NEOVERSEN1 | |||
| @@ -1305,7 +1349,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-march=armv8.2-a -mtune=neoverse-n1" | |||
| #define LIBNAME "neoversen1" | |||
| #define CORENAME "NEOVERSEN1" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_NEOVERSEV1 | |||
| @@ -1322,7 +1365,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-march=armv8.4-a -mtune=neoverse-v1" | |||
| #define LIBNAME "neoversev1" | |||
| #define CORENAME "NEOVERSEV1" | |||
| #else | |||
| #endif | |||
| @@ -1340,7 +1382,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-march=armv8.5-a -mtune=neoverse-n2" | |||
| #define LIBNAME "neoversen2" | |||
| #define CORENAME "NEOVERSEN2" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_CORTEXA55 | |||
| @@ -1356,7 +1397,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "cortexa55" | |||
| #define CORENAME "CORTEXA55" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_FALKOR | |||
| @@ -1372,7 +1412,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "falkor" | |||
| #define CORENAME "FALKOR" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_THUNDERX | |||
| @@ -1387,7 +1426,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "thunderx" | |||
| #define CORENAME "THUNDERX" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_THUNDERX2T99 | |||
| @@ -1405,7 +1443,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "thunderx2t99" | |||
| #define CORENAME "THUNDERX2T99" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_TSV110 | |||
| @@ -1421,7 +1458,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "tsv110" | |||
| #define CORENAME "TSV110" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_EMAG8180 | |||
| @@ -1456,7 +1492,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "thunderx3t110" | |||
| #define CORENAME "THUNDERX3T110" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_VORTEX | |||
| @@ -1488,7 +1523,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8" | |||
| #define LIBNAME "a64fx" | |||
| #define CORENAME "A64FX" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_FT2000 | |||
| #define ARMV8 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "FT2000" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DFT2000 " \ | |||
| "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \ | |||
| "-DL2_SIZE=33554426-DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "ft2000" | |||
| #define CORENAME "FT2000" | |||
| #endif | |||
| #ifdef FORCE_ZARCH_GENERIC | |||
| @@ -678,7 +678,7 @@ endif () | |||
| set(SBGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) | |||
| endif () | |||
| if (NOT DEFINED SBGEMM_SMALL_K_B0_TT) | |||
| set($SBGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) | |||
| set(SBGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) | |||
| endif () | |||
| GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "BFLOAT16") | |||
| GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "BFLOAT16") | |||
| @@ -0,0 +1,216 @@ | |||
| SAMINKERNEL = ../arm/amin.c | |||
| DAMINKERNEL = ../arm/amin.c | |||
| CAMINKERNEL = ../arm/zamin.c | |||
| ZAMINKERNEL = ../arm/zamin.c | |||
| SMAXKERNEL = ../arm/max.c | |||
| DMAXKERNEL = ../arm/max.c | |||
| SMINKERNEL = ../arm/min.c | |||
| DMINKERNEL = ../arm/min.c | |||
| ISAMINKERNEL = ../arm/iamin.c | |||
| IDAMINKERNEL = ../arm/iamin.c | |||
| ICAMINKERNEL = ../arm/izamin.c | |||
| IZAMINKERNEL = ../arm/izamin.c | |||
| ISMAXKERNEL = ../arm/imax.c | |||
| IDMAXKERNEL = ../arm/imax.c | |||
| ISMINKERNEL = ../arm/imin.c | |||
| IDMINKERNEL = ../arm/imin.c | |||
| STRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| STRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| STRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| STRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| DTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| DTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| DTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| DTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| TRSMCOPYLN_M = trsm_lncopy_sve.c | |||
| TRSMCOPYLT_M = trsm_ltcopy_sve.c | |||
| TRSMCOPYUN_M = trsm_uncopy_sve.c | |||
| TRSMCOPYUT_M = trsm_utcopy_sve.c | |||
| CTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| CTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| CTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| CTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c | |||
| ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c | |||
| ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c | |||
| ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c | |||
| SAMAXKERNEL = amax.S | |||
| DAMAXKERNEL = amax.S | |||
| CAMAXKERNEL = zamax.S | |||
| ZAMAXKERNEL = zamax.S | |||
| SAXPYKERNEL = axpy.S | |||
| DAXPYKERNEL = axpy.S | |||
| CAXPYKERNEL = zaxpy.S | |||
| ZAXPYKERNEL = zaxpy.S | |||
| SROTKERNEL = rot.S | |||
| DROTKERNEL = rot.S | |||
| CROTKERNEL = zrot.S | |||
| ZROTKERNEL = zrot.S | |||
| SSCALKERNEL = scal.S | |||
| DSCALKERNEL = scal.S | |||
| CSCALKERNEL = zscal.S | |||
| ZSCALKERNEL = zscal.S | |||
| SGEMVNKERNEL = gemv_n.S | |||
| DGEMVNKERNEL = gemv_n.S | |||
| CGEMVNKERNEL = zgemv_n.S | |||
| ZGEMVNKERNEL = zgemv_n.S | |||
| SGEMVTKERNEL = gemv_t.S | |||
| DGEMVTKERNEL = gemv_t.S | |||
| CGEMVTKERNEL = zgemv_t.S | |||
| ZGEMVTKERNEL = zgemv_t.S | |||
| SASUMKERNEL = asum.S | |||
| DASUMKERNEL = asum.S | |||
| CASUMKERNEL = casum.S | |||
| ZASUMKERNEL = zasum.S | |||
| SCOPYKERNEL = copy.S | |||
| DCOPYKERNEL = copy.S | |||
| CCOPYKERNEL = copy.S | |||
| ZCOPYKERNEL = copy.S | |||
| SSWAPKERNEL = swap.S | |||
| DSWAPKERNEL = swap.S | |||
| CSWAPKERNEL = swap.S | |||
| ZSWAPKERNEL = swap.S | |||
| ISAMAXKERNEL = iamax.S | |||
| IDAMAXKERNEL = iamax.S | |||
| ICAMAXKERNEL = izamax.S | |||
| IZAMAXKERNEL = izamax.S | |||
| SNRM2KERNEL = nrm2.S | |||
| DNRM2KERNEL = nrm2.S | |||
| CNRM2KERNEL = znrm2.S | |||
| ZNRM2KERNEL = znrm2.S | |||
| DDOTKERNEL = dot.S | |||
| ifneq ($(C_COMPILER), PGI) | |||
| SDOTKERNEL = ../generic/dot.c | |||
| else | |||
| SDOTKERNEL = dot.S | |||
| endif | |||
| ifneq ($(C_COMPILER), PGI) | |||
| CDOTKERNEL = zdot.S | |||
| ZDOTKERNEL = zdot.S | |||
| else | |||
| CDOTKERNEL = ../arm/zdot.c | |||
| ZDOTKERNEL = ../arm/zdot.c | |||
| endif | |||
| DSDOTKERNEL = dot.S | |||
| DGEMM_BETA = dgemm_beta.S | |||
| SGEMM_BETA = sgemm_beta.S | |||
| SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||
| STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||
| SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||
| SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||
| SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
| SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| STRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
| STRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
| STRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
| STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
| SSYMMUCOPY_M = symm_ucopy_sve.c | |||
| SSYMMLCOPY_M = symm_lcopy_sve.c | |||
| DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||
| DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||
| DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||
| DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||
| DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
| DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
| DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
| DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
| DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
| DSYMMUCOPY_M = symm_ucopy_sve.c | |||
| DSYMMLCOPY_M = symm_lcopy_sve.c | |||
| CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||
| CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
| CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
| CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
| CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
| CHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
| CHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
| CSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
| CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||
| ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||
| ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
| ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
| ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
| ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
| ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
| ZHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
| ZSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
| ZSYMMLCOPY_M = zsymm_lcopy_sve.c | |||
| @@ -0,0 +1,216 @@ | |||
| SAMINKERNEL = ../arm/amin.c | |||
| DAMINKERNEL = ../arm/amin.c | |||
| CAMINKERNEL = ../arm/zamin.c | |||
| ZAMINKERNEL = ../arm/zamin.c | |||
| SMAXKERNEL = ../arm/max.c | |||
| DMAXKERNEL = ../arm/max.c | |||
| SMINKERNEL = ../arm/min.c | |||
| DMINKERNEL = ../arm/min.c | |||
| ISAMINKERNEL = ../arm/iamin.c | |||
| IDAMINKERNEL = ../arm/iamin.c | |||
| ICAMINKERNEL = ../arm/izamin.c | |||
| IZAMINKERNEL = ../arm/izamin.c | |||
| ISMAXKERNEL = ../arm/imax.c | |||
| IDMAXKERNEL = ../arm/imax.c | |||
| ISMINKERNEL = ../arm/imin.c | |||
| IDMINKERNEL = ../arm/imin.c | |||
| STRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| STRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| STRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| STRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| DTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| DTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| DTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| DTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| TRSMCOPYLN_M = trsm_lncopy_sve.c | |||
| TRSMCOPYLT_M = trsm_ltcopy_sve.c | |||
| TRSMCOPYUN_M = trsm_uncopy_sve.c | |||
| TRSMCOPYUT_M = trsm_utcopy_sve.c | |||
| CTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| CTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| CTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| CTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c | |||
| ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c | |||
| ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c | |||
| ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c | |||
| SAMAXKERNEL = amax.S | |||
| DAMAXKERNEL = amax.S | |||
| CAMAXKERNEL = zamax.S | |||
| ZAMAXKERNEL = zamax.S | |||
| SAXPYKERNEL = axpy.S | |||
| DAXPYKERNEL = axpy.S | |||
| CAXPYKERNEL = zaxpy.S | |||
| ZAXPYKERNEL = zaxpy.S | |||
| SROTKERNEL = rot.S | |||
| DROTKERNEL = rot.S | |||
| CROTKERNEL = zrot.S | |||
| ZROTKERNEL = zrot.S | |||
| SSCALKERNEL = scal.S | |||
| DSCALKERNEL = scal.S | |||
| CSCALKERNEL = zscal.S | |||
| ZSCALKERNEL = zscal.S | |||
| SGEMVNKERNEL = gemv_n.S | |||
| DGEMVNKERNEL = gemv_n.S | |||
| CGEMVNKERNEL = zgemv_n.S | |||
| ZGEMVNKERNEL = zgemv_n.S | |||
| SGEMVTKERNEL = gemv_t.S | |||
| DGEMVTKERNEL = gemv_t.S | |||
| CGEMVTKERNEL = zgemv_t.S | |||
| ZGEMVTKERNEL = zgemv_t.S | |||
| SASUMKERNEL = asum.S | |||
| DASUMKERNEL = asum.S | |||
| CASUMKERNEL = casum.S | |||
| ZASUMKERNEL = zasum.S | |||
| SCOPYKERNEL = copy.S | |||
| DCOPYKERNEL = copy.S | |||
| CCOPYKERNEL = copy.S | |||
| ZCOPYKERNEL = copy.S | |||
| SSWAPKERNEL = swap.S | |||
| DSWAPKERNEL = swap.S | |||
| CSWAPKERNEL = swap.S | |||
| ZSWAPKERNEL = swap.S | |||
| ISAMAXKERNEL = iamax.S | |||
| IDAMAXKERNEL = iamax.S | |||
| ICAMAXKERNEL = izamax.S | |||
| IZAMAXKERNEL = izamax.S | |||
| SNRM2KERNEL = nrm2.S | |||
| DNRM2KERNEL = nrm2.S | |||
| CNRM2KERNEL = znrm2.S | |||
| ZNRM2KERNEL = znrm2.S | |||
| DDOTKERNEL = dot.S | |||
| ifneq ($(C_COMPILER), PGI) | |||
| SDOTKERNEL = ../generic/dot.c | |||
| else | |||
| SDOTKERNEL = dot.S | |||
| endif | |||
| ifneq ($(C_COMPILER), PGI) | |||
| CDOTKERNEL = zdot.S | |||
| ZDOTKERNEL = zdot.S | |||
| else | |||
| CDOTKERNEL = ../arm/zdot.c | |||
| ZDOTKERNEL = ../arm/zdot.c | |||
| endif | |||
| DSDOTKERNEL = dot.S | |||
| DGEMM_BETA = dgemm_beta.S | |||
| SGEMM_BETA = sgemm_beta.S | |||
| SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||
| STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||
| SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||
| SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||
| SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
| SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| STRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
| STRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
| STRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
| STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
| SSYMMUCOPY_M = symm_ucopy_sve.c | |||
| SSYMMLCOPY_M = symm_lcopy_sve.c | |||
| DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||
| DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||
| DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||
| DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||
| DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
| DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
| DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
| DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
| DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
| DSYMMUCOPY_M = symm_ucopy_sve.c | |||
| DSYMMLCOPY_M = symm_lcopy_sve.c | |||
| CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||
| CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
| CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
| CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
| CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
| CHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
| CHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
| CSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
| CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||
| ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||
| ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
| ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
| ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
| ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
| ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
| ZHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
| ZSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
| ZSYMMLCOPY_M = zsymm_lcopy_sve.c | |||
| @@ -0,0 +1 @@ | |||
| include $(KERNELDIR)/KERNEL.CORTEXA57 | |||
| @@ -0,0 +1,216 @@ | |||
| SAMINKERNEL = ../arm/amin.c | |||
| DAMINKERNEL = ../arm/amin.c | |||
| CAMINKERNEL = ../arm/zamin.c | |||
| ZAMINKERNEL = ../arm/zamin.c | |||
| SMAXKERNEL = ../arm/max.c | |||
| DMAXKERNEL = ../arm/max.c | |||
| SMINKERNEL = ../arm/min.c | |||
| DMINKERNEL = ../arm/min.c | |||
| ISAMINKERNEL = ../arm/iamin.c | |||
| IDAMINKERNEL = ../arm/iamin.c | |||
| ICAMINKERNEL = ../arm/izamin.c | |||
| IZAMINKERNEL = ../arm/izamin.c | |||
| ISMAXKERNEL = ../arm/imax.c | |||
| IDMAXKERNEL = ../arm/imax.c | |||
| ISMINKERNEL = ../arm/imin.c | |||
| IDMINKERNEL = ../arm/imin.c | |||
| STRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| STRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| STRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| STRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| DTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| DTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| DTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| DTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| TRSMCOPYLN_M = trsm_lncopy_sve.c | |||
| TRSMCOPYLT_M = trsm_ltcopy_sve.c | |||
| TRSMCOPYUN_M = trsm_uncopy_sve.c | |||
| TRSMCOPYUT_M = trsm_utcopy_sve.c | |||
| CTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| CTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| CTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| CTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
| ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
| ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
| ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
| ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c | |||
| ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c | |||
| ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c | |||
| ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c | |||
| SAMAXKERNEL = amax.S | |||
| DAMAXKERNEL = amax.S | |||
| CAMAXKERNEL = zamax.S | |||
| ZAMAXKERNEL = zamax.S | |||
| SAXPYKERNEL = axpy.S | |||
| DAXPYKERNEL = axpy.S | |||
| CAXPYKERNEL = zaxpy.S | |||
| ZAXPYKERNEL = zaxpy.S | |||
| SROTKERNEL = rot.S | |||
| DROTKERNEL = rot.S | |||
| CROTKERNEL = zrot.S | |||
| ZROTKERNEL = zrot.S | |||
| SSCALKERNEL = scal.S | |||
| DSCALKERNEL = scal.S | |||
| CSCALKERNEL = zscal.S | |||
| ZSCALKERNEL = zscal.S | |||
| SGEMVNKERNEL = gemv_n.S | |||
| DGEMVNKERNEL = gemv_n.S | |||
| CGEMVNKERNEL = zgemv_n.S | |||
| ZGEMVNKERNEL = zgemv_n.S | |||
| SGEMVTKERNEL = gemv_t.S | |||
| DGEMVTKERNEL = gemv_t.S | |||
| CGEMVTKERNEL = zgemv_t.S | |||
| ZGEMVTKERNEL = zgemv_t.S | |||
| SASUMKERNEL = asum.S | |||
| DASUMKERNEL = asum.S | |||
| CASUMKERNEL = casum.S | |||
| ZASUMKERNEL = zasum.S | |||
| SCOPYKERNEL = copy.S | |||
| DCOPYKERNEL = copy.S | |||
| CCOPYKERNEL = copy.S | |||
| ZCOPYKERNEL = copy.S | |||
| SSWAPKERNEL = swap.S | |||
| DSWAPKERNEL = swap.S | |||
| CSWAPKERNEL = swap.S | |||
| ZSWAPKERNEL = swap.S | |||
| ISAMAXKERNEL = iamax.S | |||
| IDAMAXKERNEL = iamax.S | |||
| ICAMAXKERNEL = izamax.S | |||
| IZAMAXKERNEL = izamax.S | |||
| SNRM2KERNEL = nrm2.S | |||
| DNRM2KERNEL = nrm2.S | |||
| CNRM2KERNEL = znrm2.S | |||
| ZNRM2KERNEL = znrm2.S | |||
| DDOTKERNEL = dot.S | |||
| ifneq ($(C_COMPILER), PGI) | |||
| SDOTKERNEL = ../generic/dot.c | |||
| else | |||
| SDOTKERNEL = dot.S | |||
| endif | |||
| ifneq ($(C_COMPILER), PGI) | |||
| CDOTKERNEL = zdot.S | |||
| ZDOTKERNEL = zdot.S | |||
| else | |||
| CDOTKERNEL = ../arm/zdot.c | |||
| ZDOTKERNEL = ../arm/zdot.c | |||
| endif | |||
| DSDOTKERNEL = dot.S | |||
| DGEMM_BETA = dgemm_beta.S | |||
| SGEMM_BETA = sgemm_beta.S | |||
| SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||
| STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||
| SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||
| SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||
| SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
| SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| STRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
| STRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
| STRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
| STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
| SSYMMUCOPY_M = symm_ucopy_sve.c | |||
| SSYMMLCOPY_M = symm_lcopy_sve.c | |||
| DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||
| DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||
| DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||
| DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||
| DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
| DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
| DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
| DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
| DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
| DSYMMUCOPY_M = symm_ucopy_sve.c | |||
| DSYMMLCOPY_M = symm_lcopy_sve.c | |||
| CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||
| CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
| CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
| CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
| CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
| CHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
| CHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
| CSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
| CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||
| ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
| ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||
| ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
| ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
| ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
| ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
| ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
| ZHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
| ZSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
| ZSYMMLCOPY_M = zsymm_lcopy_sve.c | |||
| @@ -0,0 +1,3 @@ | |||
| include $(KERNELDIR)/KERNEL.CORTEXA57 | |||
| @@ -1239,7 +1239,6 @@ static void init_parameter(void) { | |||
| #ifdef BUILD_BFLOAT16 | |||
| TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P; | |||
| TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R; | |||
| TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q; | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| @@ -1824,6 +1823,13 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p); | |||
| #endif | |||
| #if BUILD_BFLOAT16==1 | |||
| TABLE_NAME.sbgemm_r = (((BUFFER_SIZE - | |||
| ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA | |||
| + TABLE_NAME.align) & ~TABLE_NAME.align) | |||
| ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15); | |||
| #endif | |||
| #if BUILD_SINGLE==1 | |||
| TABLE_NAME.sgemm_r = (((BUFFER_SIZE - | |||
| ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA | |||
| @@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9)) | |||
| #include <immintrin.h> | |||
| #include "common.h" | |||
| @@ -47,7 +48,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| _mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N) | |||
| #define MASK_STORE_512(M, N) \ | |||
| result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \ | |||
| asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "k"(mask)); \ | |||
| asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "Yk"(mask)); \ | |||
| _mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N) | |||
| #endif | |||
| @@ -265,7 +266,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||
| int mm = M - i; | |||
| if (!mm) return 0; | |||
| if (mm > 4 || K < 16) { | |||
| register __mmask8 mask asm("k1") = (1UL << mm) - 1; | |||
| register __mmask8 mask = (1UL << mm) - 1; | |||
| for (j = 0; j < n6; j += 6) { | |||
| DECLARE_RESULT_512(0, 0); | |||
| DECLARE_RESULT_512(0, 1); | |||
| @@ -588,3 +589,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||
| } | |||
| return 0; | |||
| } | |||
| #else | |||
| #include "../generic/gemm_small_matrix_kernel_nn.c" | |||
| #endif | |||
| @@ -55,7 +55,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| _mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N) | |||
| #define MASK_STORE_512(M, N) \ | |||
| result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \ | |||
| asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "k"(mask)); \ | |||
| asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "Yk"(mask)); \ | |||
| _mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N) | |||
| #define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \ | |||
| __m512d tmp##M##N = _mm512_i64gather_pd(vindex_n, &C[(j + N*8)*ldc + i + M], 8); \ | |||
| @@ -303,7 +303,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||
| } | |||
| int mm = M - i; | |||
| if (mm >= 6) { | |||
| register __mmask16 mask asm("k1") = (1UL << mm) - 1; | |||
| register __mmask16 mask = (1UL << mm) - 1; | |||
| for (j = 0; j < n8; j += 8) { | |||
| DECLARE_RESULT_512(0, 0); | |||
| DECLARE_RESULT_512(0, 1); | |||
| @@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9)) | |||
| #include <immintrin.h> | |||
| #include "common.h" | |||
| @@ -320,3 +321,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||
| } | |||
| return 0; | |||
| } | |||
| #else | |||
| #include "../generic/gemm_small_matrix_kernel_tn.c" | |||
| #endif | |||
| @@ -114,10 +114,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| asm("vmovups %0, (%1, %2, 4)": : "v"(val1), "r"(addr), "r"(ldc)) | |||
| #define _MASK_STORE_C_2nx16(addr, val0, val1) \ | |||
| asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "k"(mmask)); \ | |||
| asm("vfmadd213ps (%1, %3, 4), %2, %0 %{%4%}": "+v"(val1) : "r"(addr), "v"(alpha_512), "r"(ldc), "k"(mmask)); \ | |||
| asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "k"(mmask)); \ | |||
| asm("vmovups %0, (%1, %2, 4) %{%3%}": : "v"(val1), "r"(addr), "r"(ldc), "k"(mmask)) | |||
| asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "Yk"(mmask)); \ | |||
| asm("vfmadd213ps (%1, %3, 4), %2, %0 %{%4%}": "+v"(val1) : "r"(addr), "v"(alpha_512), "r"(ldc), "Yk"(mmask)); \ | |||
| asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "Yk"(mmask)); \ | |||
| asm("vmovups %0, (%1, %2, 4) %{%3%}": : "v"(val1), "r"(addr), "r"(ldc), "Yk"(mmask)) | |||
| #define _REORDER_C_2X(result_0, result_1) { \ | |||
| __m512 tmp0, tmp1; \ | |||
| @@ -154,8 +154,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| asm("vmovups %0, (%1)": : "v"(val0), "r"(addr)); | |||
| #define _MASK_STORE_C_16(addr, val0) \ | |||
| asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "k"(mmask)); \ | |||
| asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "k"(mmask)); | |||
| asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "Yk"(mmask)); \ | |||
| asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "Yk"(mmask)); | |||
| #define N_STORE_4X(A, Bx, By) { \ | |||
| _REORDER_C_2X(result_00_##A##Bx##By, result_01_##A##Bx##By); \ | |||
| @@ -13,6 +13,8 @@ | |||
| #define ONE 1.e0f | |||
| #define ZERO 0.e0f | |||
| #define SHUFFLE_MAGIC_NO (const int) 0x39 | |||
| #undef STORE16_COMPLETE_RESULT | |||
| #undef STORE16_MASK_COMPLETE_RESULT | |||
| #undef SBGEMM_BLOCK_KERNEL_NN_32x8xK | |||
| @@ -356,7 +358,6 @@ void sbgemm_block_kernel_nn_32xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa | |||
| bfloat16 * B_addr = B; | |||
| float * C_addr = C; | |||
| int SHUFFLE_MAGIC_NO = 0x39; | |||
| BLASLONG tag_k_32x = k & (~31); | |||
| #ifndef ONE_ALPHA | |||
| @@ -465,7 +466,6 @@ void sbgemm_block_kernel_nn_16xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa | |||
| bfloat16 * B_addr = B; | |||
| float * C_addr = C; | |||
| int SHUFFLE_MAGIC_NO = 0x39; | |||
| BLASLONG tag_k_32x = k & (~31); | |||
| #ifndef ONE_ALPHA | |||
| @@ -1192,7 +1192,6 @@ void sbgemm_block_kernel_tn_32xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa | |||
| bfloat16 * B_addr = B; | |||
| float * C_addr = C; | |||
| int SHUFFLE_MAGIC_NO = 0x39; | |||
| BLASLONG tag_k_32x = k & (~31); | |||
| #ifndef ONE_ALPHA | |||
| @@ -1291,7 +1290,6 @@ void sbgemm_block_kernel_tn_16xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa | |||
| bfloat16 * B_addr = B; | |||
| float * C_addr = C; | |||
| int SHUFFLE_MAGIC_NO = 0x39; | |||
| BLASLONG tag_k_32x = k & (~31); | |||
| #ifndef ONE_ALPHA | |||
| @@ -135,7 +135,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){ | |||
| 0x0, 0x1, 0x2, 0x3, 0x10, 0x11, 0x12, 0x13, 0x8, 0x9, 0xa, 0xb, 0x18, 0x19, 0x1a, 0x1b, | |||
| 0x4, 0x5, 0x6, 0x7, 0x14, 0x15, 0x16, 0x17, 0xc, 0xd, 0xe, 0xf, 0x1c, 0x1d, 0x1e, 0x1f, | |||
| }; | |||
| u_int64_t permute_table2[] = { | |||
| uint64_t permute_table2[] = { | |||
| 0x00, 0x01, 0x02, 0x03, 8|0x0, 8|0x1, 8|0x2, 8|0x3, | |||
| 0x04, 0x05, 0x06, 0x07, 8|0x4, 8|0x5, 8|0x6, 8|0x7, | |||
| }; | |||
| @@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9)) | |||
| #include <immintrin.h> | |||
| #include "common.h" | |||
| @@ -47,7 +48,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| _mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N) | |||
| #define MASK_STORE_512(M, N) \ | |||
| result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \ | |||
| asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "k"(mask)); \ | |||
| asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "Yk"(mask)); \ | |||
| _mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N) | |||
| #endif | |||
| @@ -266,7 +267,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||
| int mm = M - i; | |||
| if (!mm) return 0; | |||
| if (mm > 8 || K < 32) { | |||
| register __mmask16 mask asm("k1") = (1UL << mm) - 1; | |||
| register __mmask16 mask = (1UL << mm) - 1; | |||
| for (j = 0; j < n6; j += 6) { | |||
| DECLARE_RESULT_512(0, 0); | |||
| DECLARE_RESULT_512(0, 1); | |||
| @@ -610,3 +611,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||
| } | |||
| return 0; | |||
| } | |||
| #else | |||
| #include "../generic/gemm_small_matrix_kernel_nn.c" | |||
| #endif | |||
| @@ -55,7 +55,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| _mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N) | |||
| #define MASK_STORE_512(M, N) \ | |||
| result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \ | |||
| asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "k"(mask)); \ | |||
| asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "Yk"(mask)); \ | |||
| _mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N) | |||
| #define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \ | |||
| __m512 tmp##M##N = _mm512_i32gather_ps(vindex_n, &C[(j + N*16)*ldc + i + M], 4); \ | |||
| @@ -303,7 +303,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||
| } | |||
| int mm = M - i; | |||
| if (mm >= 12) { | |||
| register __mmask16 mask asm("k1") = (1UL << mm) - 1; | |||
| register __mmask16 mask = (1UL << mm) - 1; | |||
| for (j = 0; j < n8; j += 8) { | |||
| DECLARE_RESULT_512(0, 0); | |||
| DECLARE_RESULT_512(0, 1); | |||
| @@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9)) | |||
| #include <immintrin.h> | |||
| #include "common.h" | |||
| @@ -314,3 +315,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||
| } | |||
| return 0; | |||
| } | |||
| #else | |||
| #include "../generic/gemm_small_matrix_kernel_tn.c" | |||
| #endif | |||
| @@ -452,11 +452,6 @@ | |||
| MOVDDUP(4 * SIZE, A1, a1) | |||
| movsd 0 * SIZE(YY), yy1 | |||
| movhpd 1 * SIZE(YY), yy1 | |||
| movsd 2 * SIZE(YY), yy2 | |||
| movhpd 3 * SIZE(YY), yy2 | |||
| movapd 8 * SIZE(XX), xtemp1 | |||
| movapd 10 * SIZE(XX), xtemp2 | |||
| movapd 12 * SIZE(XX), xtemp3 | |||
| @@ -475,6 +470,12 @@ | |||
| MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2) | |||
| ALIGN_3 | |||
| .L12_prep: | |||
| movsd 0 * SIZE(YY), yy1 | |||
| movhpd 1 * SIZE(YY), yy1 | |||
| movsd 2 * SIZE(YY), yy2 | |||
| movhpd 3 * SIZE(YY), yy2 | |||
| .L12: | |||
| movapd xtemp1, xt1 | |||
| mulpd a1, xt1 | |||
| @@ -608,8 +609,6 @@ | |||
| movlpd yy2, 6 * SIZE(YY) | |||
| movhpd yy2, 7 * SIZE(YY) | |||
| movsd 10 * SIZE(YY), yy2 | |||
| movhpd 11 * SIZE(YY), yy2 | |||
| movapd xtemp2, xt1 | |||
| movapd 18 * SIZE(XX), xtemp2 | |||
| @@ -621,8 +620,6 @@ | |||
| movlpd yy1, 4 * SIZE(YY) | |||
| movhpd yy1, 5 * SIZE(YY) | |||
| movsd 8 * SIZE(YY), yy1 | |||
| movhpd 9 * SIZE(YY), yy1 | |||
| subq $-16 * SIZE, XX | |||
| addq $ 8 * SIZE, YY | |||
| @@ -630,7 +627,8 @@ | |||
| addq $ 8 * SIZE, A2 | |||
| decq I | |||
| jg .L12 | |||
| jg .L12_prep | |||
| jmp .L15 | |||
| ALIGN_3 | |||
| .L14: | |||
| @@ -641,7 +639,6 @@ | |||
| jle .L16 | |||
| MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2) | |||
| jmp .L15_pastcheck | |||
| .L15: | |||
| movq M, I | |||
| @@ -650,6 +647,11 @@ | |||
| testq $2, I | |||
| jle .L16 | |||
| movsd 0 * SIZE(YY), yy1 | |||
| movhpd 1 * SIZE(YY), yy1 | |||
| movsd 2 * SIZE(YY), yy2 | |||
| movhpd 3 * SIZE(YY), yy2 | |||
| .L15_pastcheck: | |||
| movapd xtemp1, xt1 | |||
| mulpd a1, xt1 | |||
| @@ -705,8 +707,6 @@ | |||
| movlpd yy2, 2 * SIZE(YY) | |||
| movhpd yy2, 3 * SIZE(YY) | |||
| movsd 6 * SIZE(YY), yy2 | |||
| movhpd 7 * SIZE(YY), yy2 | |||
| movapd xtemp2, xt1 | |||
| movapd 10 * SIZE(XX), xtemp2 | |||
| @@ -717,8 +717,6 @@ | |||
| movlpd yy1, 0 * SIZE(YY) | |||
| movhpd yy1, 1 * SIZE(YY) | |||
| movsd 4 * SIZE(YY), yy1 | |||
| movhpd 5 * SIZE(YY), yy1 | |||
| addq $4 * SIZE, YY | |||
| addq $4 * SIZE, A1 | |||
| @@ -731,6 +729,9 @@ | |||
| MOVDDUP(1 * SIZE, A1, a2) | |||
| movsd 0 * SIZE(YY), yy1 | |||
| movhpd 1 * SIZE(YY), yy1 | |||
| movapd xtemp1, xt1 | |||
| mulpd a1, xt1 | |||
| mulpd atemp1, a1 | |||
| @@ -2,9 +2,9 @@ add_subdirectory(SRC) | |||
| if(BUILD_TESTING) | |||
| add_subdirectory(TESTING) | |||
| endif() | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/blas.pc @ONLY) | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${BLASLIB}.pc @ONLY) | |||
| install(FILES | |||
| ${CMAKE_CURRENT_BINARY_DIR}/blas.pc | |||
| ${CMAKE_CURRENT_BINARY_DIR}/${BLASLIB}.pc | |||
| DESTINATION ${PKG_CONFIG_DIR} | |||
| COMPONENT Development | |||
| ) | |||
| @@ -97,10 +97,10 @@ if(BUILD_COMPLEX16) | |||
| endif() | |||
| list(REMOVE_DUPLICATES SOURCES) | |||
| add_library(blas ${SOURCES}) | |||
| add_library(${BLASLIB} ${SOURCES}) | |||
| set_target_properties( | |||
| blas PROPERTIES | |||
| ${BLASLIB} PROPERTIES | |||
| VERSION ${LAPACK_VERSION} | |||
| SOVERSION ${LAPACK_MAJOR_VERSION} | |||
| ) | |||
| lapack_install_library(blas) | |||
| lapack_install_library(${BLASLIB}) | |||
| @@ -2,7 +2,7 @@ macro(add_blas_test name src) | |||
| get_filename_component(baseNAME ${src} NAME_WE) | |||
| set(TEST_INPUT "${CMAKE_CURRENT_SOURCE_DIR}/${baseNAME}.in") | |||
| add_executable(${name} ${src}) | |||
| target_link_libraries(${name} blas) | |||
| target_link_libraries(${name} ${BLASLIB}) | |||
| if(EXISTS "${TEST_INPUT}") | |||
| add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}" | |||
| -DTEST=$<TARGET_FILE:${name}> | |||
| @@ -5,4 +5,4 @@ Name: BLAS | |||
| Description: FORTRAN reference implementation of BLAS Basic Linear Algebra Subprograms | |||
| Version: @LAPACK_VERSION@ | |||
| URL: http://www.netlib.org/blas/ | |||
| Libs: -L${libdir} -lblas | |||
| Libs: -L${libdir} -l@BLASLIB@ | |||
| @@ -1,7 +1,7 @@ | |||
| message(STATUS "CBLAS enable") | |||
| enable_language(C) | |||
| set(LAPACK_INSTALL_EXPORT_NAME cblas-targets) | |||
| set(LAPACK_INSTALL_EXPORT_NAME ${CBLASLIB}-targets) | |||
| # Create a header file cblas.h for the routines called in my C programs | |||
| include(FortranCInterface) | |||
| @@ -42,15 +42,15 @@ if(BUILD_TESTING) | |||
| endif() | |||
| if(NOT BLAS_FOUND) | |||
| set(ALL_TARGETS ${ALL_TARGETS} blas) | |||
| set(ALL_TARGETS ${ALL_TARGETS} ${BLASLIB}) | |||
| endif() | |||
| # Export cblas targets from the | |||
| # install tree, if any. | |||
| set(_cblas_config_install_guard_target "") | |||
| if(ALL_TARGETS) | |||
| install(EXPORT cblas-targets | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} | |||
| install(EXPORT ${CBLASLIB}-targets | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION} | |||
| COMPONENT Development | |||
| ) | |||
| # Choose one of the cblas targets to use as a guard for | |||
| @@ -61,7 +61,7 @@ endif() | |||
| # Export cblas targets from the build tree, if any. | |||
| set(_cblas_config_build_guard_target "") | |||
| if(ALL_TARGETS) | |||
| export(TARGETS ${ALL_TARGETS} FILE cblas-targets.cmake) | |||
| export(TARGETS ${ALL_TARGETS} FILE ${CBLASLIB}-targets.cmake) | |||
| # Choose one of the cblas targets to use as a guard | |||
| # for cblas-config.cmake to load targets from the build tree. | |||
| @@ -69,26 +69,26 @@ if(ALL_TARGETS) | |||
| endif() | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-version.cmake.in | |||
| ${LAPACK_BINARY_DIR}/cblas-config-version.cmake @ONLY) | |||
| ${LAPACK_BINARY_DIR}/${CBLASLIB}-config-version.cmake @ONLY) | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-build.cmake.in | |||
| ${LAPACK_BINARY_DIR}/cblas-config.cmake @ONLY) | |||
| ${LAPACK_BINARY_DIR}/${CBLASLIB}-config.cmake @ONLY) | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cblas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/cblas.pc @ONLY) | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cblas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${CBLASLIB}.pc @ONLY) | |||
| install(FILES | |||
| ${CMAKE_CURRENT_BINARY_DIR}/cblas.pc | |||
| ${CMAKE_CURRENT_BINARY_DIR}/${CBLASLIB}.pc | |||
| DESTINATION ${PKG_CONFIG_DIR} | |||
| ) | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-install.cmake.in | |||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake @ONLY) | |||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${CBLASLIB}-config.cmake @ONLY) | |||
| install(FILES | |||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake | |||
| ${LAPACK_BINARY_DIR}/cblas-config-version.cmake | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} | |||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${CBLASLIB}-config.cmake | |||
| ${LAPACK_BINARY_DIR}/${CBLASLIB}-config-version.cmake | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION} | |||
| ) | |||
| #install(EXPORT cblas-targets | |||
| # DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} | |||
| #install(EXPORT ${CBLASLIB}-targets | |||
| # DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION} | |||
| # COMPONENT Development | |||
| # ) | |||
| @@ -5,6 +5,6 @@ Name: CBLAS | |||
| Description: C Standard Interface to BLAS Basic Linear Algebra Subprograms | |||
| Version: @LAPACK_VERSION@ | |||
| URL: http://www.netlib.org/blas/#_cblas | |||
| Libs: -L${libdir} -lcblas | |||
| Libs: -L${libdir} -l@CBLASLIB@ | |||
| Cflags: -I${includedir} | |||
| Requires.private: blas | |||
| Requires.private: @BLASLIB@ | |||
| @@ -4,11 +4,11 @@ find_package(LAPACK NO_MODULE) | |||
| # Load lapack targets from the build tree, including lapacke targets. | |||
| if(NOT TARGET lapacke) | |||
| include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") | |||
| include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake") | |||
| endif() | |||
| # Report cblas header search locations from build tree. | |||
| set(CBLAS_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include") | |||
| # Report cblas libraries. | |||
| set(CBLAS_LIBRARIES cblas) | |||
| set(CBLAS_LIBRARIES @CBLASLIB@) | |||
| @@ -5,19 +5,19 @@ get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH) | |||
| get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH) | |||
| # Load the LAPACK package with which we were built. | |||
| set(LAPACK_DIR "${_CBLAS_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/lapack-@LAPACK_VERSION@") | |||
| set(LAPACK_DIR "${_CBLAS_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/@LAPACKLIB@-@LAPACK_VERSION@") | |||
| find_package(LAPACK NO_MODULE) | |||
| # Load lapacke targets from the install tree. | |||
| if(NOT TARGET cblas) | |||
| include(${_CBLAS_SELF_DIR}/cblas-targets.cmake) | |||
| if(NOT TARGET @CBLASLIB@) | |||
| include(${_CBLAS_SELF_DIR}/@CBLASLIB@-targets.cmake) | |||
| endif() | |||
| # Report lapacke header search locations. | |||
| set(CBLAS_INCLUDE_DIRS ${_CBLAS_PREFIX}/include) | |||
| # Report lapacke libraries. | |||
| set(CBLAS_LIBRARIES cblas) | |||
| set(CBLAS_LIBRARIES @CBLASLIB@) | |||
| unset(_CBLAS_PREFIX) | |||
| unset(_CBLAS_SELF_DIR) | |||
| @@ -1,8 +1,8 @@ | |||
| add_executable(xexample1_CBLAS cblas_example1.c) | |||
| add_executable(xexample2_CBLAS cblas_example2.c) | |||
| target_link_libraries(xexample1_CBLAS cblas) | |||
| target_link_libraries(xexample2_CBLAS cblas ${BLAS_LIBRARIES}) | |||
| target_link_libraries(xexample1_CBLAS ${CBLASLIB}) | |||
| target_link_libraries(xexample2_CBLAS ${CBLASLIB} ${BLAS_LIBRARIES}) | |||
| add_test(example1_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample1_CBLAS) | |||
| add_test(example2_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample2_CBLAS) | |||
| @@ -11,7 +11,7 @@ int main ( ) | |||
| double *a, *x, *y; | |||
| double alpha, beta; | |||
| int m, n, lda, incx, incy, i; | |||
| CBLAS_INDEX m, n, lda, incx, incy, i; | |||
| Layout = CblasColMajor; | |||
| transa = CblasNoTrans; | |||
| @@ -9,7 +9,7 @@ | |||
| int main (int argc, char **argv ) | |||
| { | |||
| int rout=-1,info=0,m,n,k,lda,ldb,ldc; | |||
| CBLAS_INDEX rout=-1,info=0,m,n,k,lda,ldb,ldc; | |||
| double A[2] = {0.0,0.0}, | |||
| B[2] = {0.0,0.0}, | |||
| C[2] = {0.0,0.0}, | |||
| @@ -1,6 +1,7 @@ | |||
| #ifndef CBLAS_H | |||
| #define CBLAS_H | |||
| #include <stddef.h> | |||
| #include <stdint.h> | |||
| #ifdef __cplusplus | |||
| @@ -11,9 +12,9 @@ extern "C" { /* Assume C declarations for C++ */ | |||
| * Enumerated and derived types | |||
| */ | |||
| #ifdef WeirdNEC | |||
| #define CBLAS_INDEX long | |||
| #define CBLAS_INDEX int64_t | |||
| #else | |||
| #define CBLAS_INDEX int | |||
| #define CBLAS_INDEX int32_t | |||
| #endif | |||
| typedef enum {CblasRowMajor=101, CblasColMajor=102} CBLAS_LAYOUT; | |||
| @@ -9,6 +9,8 @@ | |||
| #ifndef CBLAS_F77_H | |||
| #define CBLAS_F77_H | |||
| #include <stdint.h> | |||
| #ifdef CRAY | |||
| #include <fortran.h> | |||
| #define F77_CHAR _fcd | |||
| @@ -17,8 +19,12 @@ | |||
| #define F77_STRLEN(a) (_fcdlen) | |||
| #endif | |||
| #ifndef F77_INT | |||
| #ifdef WeirdNEC | |||
| #define F77_INT long | |||
| #define F77_INT int64_t | |||
| #else | |||
| #define F77_INT int32_t | |||
| #endif | |||
| #endif | |||
| #ifdef F77_CHAR | |||
| @@ -113,16 +113,16 @@ if(BUILD_COMPLEX16) | |||
| endif() | |||
| list(REMOVE_DUPLICATES SOURCES) | |||
| add_library(cblas ${SOURCES}) | |||
| add_library(${CBLASLIB} ${SOURCES}) | |||
| set_target_properties( | |||
| cblas PROPERTIES | |||
| ${CBLASLIB} PROPERTIES | |||
| LINKER_LANGUAGE C | |||
| VERSION ${LAPACK_VERSION} | |||
| SOVERSION ${LAPACK_MAJOR_VERSION} | |||
| ) | |||
| target_include_directories(cblas PUBLIC | |||
| target_include_directories(${CBLASLIB} PUBLIC | |||
| $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> | |||
| $<INSTALL_INTERFACE:include> | |||
| ) | |||
| target_link_libraries(cblas PRIVATE ${BLAS_LIBRARIES}) | |||
| lapack_install_library(cblas) | |||
| target_link_libraries(${CBLASLIB} PRIVATE ${BLAS_LIBRARIES}) | |||
| lapack_install_library(${CBLASLIB}) | |||
| @@ -52,9 +52,9 @@ if(BUILD_SINGLE) | |||
| add_executable(xscblat2 c_sblat2.f ${STESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | |||
| add_executable(xscblat3 c_sblat3.f ${STESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | |||
| target_link_libraries(xscblat1 cblas) | |||
| target_link_libraries(xscblat2 cblas) | |||
| target_link_libraries(xscblat3 cblas) | |||
| target_link_libraries(xscblat1 ${CBLASLIB}) | |||
| target_link_libraries(xscblat2 ${CBLASLIB}) | |||
| target_link_libraries(xscblat3 ${CBLASLIB}) | |||
| add_cblas_test(stest1.out "" xscblat1) | |||
| add_cblas_test(stest2.out sin2 xscblat2) | |||
| @@ -66,9 +66,9 @@ if(BUILD_DOUBLE) | |||
| add_executable(xdcblat2 c_dblat2.f ${DTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | |||
| add_executable(xdcblat3 c_dblat3.f ${DTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | |||
| target_link_libraries(xdcblat1 cblas) | |||
| target_link_libraries(xdcblat2 cblas) | |||
| target_link_libraries(xdcblat3 cblas) | |||
| target_link_libraries(xdcblat1 ${CBLASLIB}) | |||
| target_link_libraries(xdcblat2 ${CBLASLIB}) | |||
| target_link_libraries(xdcblat3 ${CBLASLIB}) | |||
| add_cblas_test(dtest1.out "" xdcblat1) | |||
| add_cblas_test(dtest2.out din2 xdcblat2) | |||
| @@ -80,9 +80,9 @@ if(BUILD_COMPLEX) | |||
| add_executable(xccblat2 c_cblat2.f ${CTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | |||
| add_executable(xccblat3 c_cblat3.f ${CTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | |||
| target_link_libraries(xccblat1 cblas ${BLAS_LIBRARIES}) | |||
| target_link_libraries(xccblat2 cblas) | |||
| target_link_libraries(xccblat3 cblas) | |||
| target_link_libraries(xccblat1 ${CBLASLIB} ${BLAS_LIBRARIES}) | |||
| target_link_libraries(xccblat2 ${CBLASLIB}) | |||
| target_link_libraries(xccblat3 ${CBLASLIB}) | |||
| add_cblas_test(ctest1.out "" xccblat1) | |||
| add_cblas_test(ctest2.out cin2 xccblat2) | |||
| @@ -94,9 +94,9 @@ if(BUILD_COMPLEX16) | |||
| add_executable(xzcblat2 c_zblat2.f ${ZTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | |||
| add_executable(xzcblat3 c_zblat3.f ${ZTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | |||
| target_link_libraries(xzcblat1 cblas) | |||
| target_link_libraries(xzcblat2 cblas) | |||
| target_link_libraries(xzcblat3 cblas) | |||
| target_link_libraries(xzcblat1 ${CBLASLIB}) | |||
| target_link_libraries(xzcblat2 ${CBLASLIB}) | |||
| target_link_libraries(xzcblat3 ${CBLASLIB}) | |||
| add_cblas_test(ztest1.out "" xzcblat1) | |||
| add_cblas_test(ztest2.out zin2 xzcblat2) | |||
| @@ -14,6 +14,19 @@ macro( CheckLAPACKCompilerFlags ) | |||
| set( FPE_EXIT FALSE ) | |||
| # FORTRAN ILP default | |||
| if ( FORTRAN_ILP ) | |||
| if( CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" ) | |||
| if ( WIN32 ) | |||
| set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} /integer-size:64") | |||
| else () | |||
| set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -integer-size 64") | |||
| endif() | |||
| else() | |||
| set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-integer-8") | |||
| endif() | |||
| endif() | |||
| # GNU Fortran | |||
| if( CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" ) | |||
| if( "${CMAKE_Fortran_FLAGS}" MATCHES "-ffpe-trap=[izoupd]") | |||
| @@ -1,7 +1,7 @@ | |||
| # Load lapack targets from the build tree if necessary. | |||
| set(_LAPACK_TARGET "@_lapack_config_build_guard_target@") | |||
| if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}") | |||
| include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") | |||
| include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake") | |||
| endif() | |||
| unset(_LAPACK_TARGET) | |||
| @@ -4,7 +4,7 @@ get_filename_component(_LAPACK_SELF_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) | |||
| # Load lapack targets from the install tree if necessary. | |||
| set(_LAPACK_TARGET "@_lapack_config_install_guard_target@") | |||
| if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}") | |||
| include("${_LAPACK_SELF_DIR}/lapack-targets.cmake") | |||
| include("${_LAPACK_SELF_DIR}/@LAPACKLIB@-targets.cmake") | |||
| endif() | |||
| unset(_LAPACK_TARGET) | |||
| @@ -44,6 +44,24 @@ endif() | |||
| # By default static library | |||
| option(BUILD_SHARED_LIBS "Build shared libraries" OFF) | |||
| # By default build index32 library | |||
| option(BUILD_INDEX64 "Build Index-64 API libraries" OFF) | |||
| if(BUILD_INDEX64) | |||
| set(BLASLIB "blas64") | |||
| set(CBLASLIB "cblas64") | |||
| set(LAPACKLIB "lapack64") | |||
| set(LAPACKELIB "lapacke64") | |||
| set(TMGLIB "tmglib64") | |||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DWeirdNEC -DLAPACK_ILP64 -DHAVE_LAPACK_CONFIG_H") | |||
| set(FORTRAN_ILP TRUE) | |||
| else() | |||
| set(BLASLIB "blas") | |||
| set(CBLASLIB "cblas") | |||
| set(LAPACKLIB "lapack") | |||
| set(LAPACKELIB "lapacke") | |||
| set(TMGLIB "tmglib") | |||
| endif() | |||
| include(GNUInstallDirs) | |||
| # Updated OSX RPATH settings | |||
| @@ -73,10 +91,10 @@ include(PreventInBuildInstalls) | |||
| if(UNIX) | |||
| if(CMAKE_Fortran_COMPILER_ID STREQUAL Intel) | |||
| list(APPEND CMAKE_Fortran_FLAGS "-fp-model strict") | |||
| set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fp-model strict") | |||
| endif() | |||
| if(CMAKE_Fortran_COMPILER_ID STREQUAL XL) | |||
| list(APPEND CMAKE_Fortran_FLAGS "-qnosave -qstrict=none") | |||
| set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qnosave -qstrict=none") | |||
| endif() | |||
| # Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler. | |||
| # This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin | |||
| @@ -112,7 +130,7 @@ endif() | |||
| # -------------------------------------------------- | |||
| set(LAPACK_INSTALL_EXPORT_NAME lapack-targets) | |||
| set(LAPACK_INSTALL_EXPORT_NAME ${LAPACKLIB}-targets) | |||
| macro(lapack_install_library lib) | |||
| install(TARGETS ${lib} | |||
| @@ -220,7 +238,7 @@ endif() | |||
| if(NOT BLAS_FOUND) | |||
| message(STATUS "Using supplied NETLIB BLAS implementation") | |||
| add_subdirectory(BLAS) | |||
| set(BLAS_LIBRARIES blas) | |||
| set(BLAS_LIBRARIES ${BLASLIB}) | |||
| else() | |||
| set(CMAKE_EXE_LINKER_FLAGS | |||
| "${CMAKE_EXE_LINKER_FLAGS} ${BLAS_LINKER_FLAGS}" | |||
| @@ -279,7 +297,7 @@ endif() | |||
| # Neither user specified or optimized LAPACK libraries can be used | |||
| if(NOT LATESTLAPACK_FOUND) | |||
| message(STATUS "Using supplied NETLIB LAPACK implementation") | |||
| set(LAPACK_LIBRARIES lapack) | |||
| set(LAPACK_LIBRARIES ${LAPACKLIB}) | |||
| add_subdirectory(SRC) | |||
| else() | |||
| set(CMAKE_EXE_LINKER_FLAGS | |||
| @@ -371,23 +389,23 @@ include(CPack) | |||
| # -------------------------------------------------- | |||
| if(NOT BLAS_FOUND) | |||
| set(ALL_TARGETS ${ALL_TARGETS} blas) | |||
| set(ALL_TARGETS ${ALL_TARGETS} ${BLASLIB}) | |||
| endif() | |||
| if(NOT LATESTLAPACK_FOUND) | |||
| set(ALL_TARGETS ${ALL_TARGETS} lapack) | |||
| set(ALL_TARGETS ${ALL_TARGETS} ${LAPACKLIB}) | |||
| endif() | |||
| if(BUILD_TESTING OR LAPACKE_WITH_TMG) | |||
| set(ALL_TARGETS ${ALL_TARGETS} tmglib) | |||
| set(ALL_TARGETS ${ALL_TARGETS} ${TMGLIB}) | |||
| endif() | |||
| # Export lapack targets, not including lapacke, from the | |||
| # install tree, if any. | |||
| set(_lapack_config_install_guard_target "") | |||
| if(ALL_TARGETS) | |||
| install(EXPORT lapack-targets | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION} | |||
| install(EXPORT ${LAPACKLIB}-targets | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKLIB}-${LAPACK_VERSION} | |||
| COMPONENT Development | |||
| ) | |||
| @@ -398,18 +416,18 @@ endif() | |||
| # Include cblas in targets exported from the build tree. | |||
| if(CBLAS) | |||
| set(ALL_TARGETS ${ALL_TARGETS} cblas) | |||
| set(ALL_TARGETS ${ALL_TARGETS} ${CBLASLIB}) | |||
| endif() | |||
| # Include lapacke in targets exported from the build tree. | |||
| if(LAPACKE) | |||
| set(ALL_TARGETS ${ALL_TARGETS} lapacke) | |||
| set(ALL_TARGETS ${ALL_TARGETS} ${LAPACKELIB}) | |||
| endif() | |||
| # Export lapack and lapacke targets from the build tree, if any. | |||
| set(_lapack_config_build_guard_target "") | |||
| if(ALL_TARGETS) | |||
| export(TARGETS ${ALL_TARGETS} FILE lapack-targets.cmake) | |||
| export(TARGETS ${ALL_TARGETS} FILE ${LAPACKLIB}-targets.cmake) | |||
| # Choose one of the lapack or lapacke targets to use as a guard | |||
| # for lapack-config.cmake to load targets from the build tree. | |||
| @@ -417,30 +435,30 @@ if(ALL_TARGETS) | |||
| endif() | |||
| configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-build.cmake.in | |||
| ${LAPACK_BINARY_DIR}/lapack-config.cmake @ONLY) | |||
| ${LAPACK_BINARY_DIR}/${LAPACKLIB}-config.cmake @ONLY) | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_DIR}/lapack.pc @ONLY) | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKLIB}.pc @ONLY) | |||
| install(FILES | |||
| ${CMAKE_CURRENT_BINARY_DIR}/lapack.pc | |||
| ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKLIB}.pc | |||
| DESTINATION ${PKG_CONFIG_DIR} | |||
| COMPONENT Development | |||
| ) | |||
| configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-install.cmake.in | |||
| ${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake @ONLY) | |||
| ${LAPACK_BINARY_DIR}/CMakeFiles/${LAPACKLIB}-config.cmake @ONLY) | |||
| include(CMakePackageConfigHelpers) | |||
| write_basic_package_version_file( | |||
| ${LAPACK_BINARY_DIR}/lapack-config-version.cmake | |||
| ${LAPACK_BINARY_DIR}/${LAPACKLIB}-config-version.cmake | |||
| VERSION ${LAPACK_VERSION} | |||
| COMPATIBILITY SameMajorVersion | |||
| ) | |||
| install(FILES | |||
| ${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake | |||
| ${LAPACK_BINARY_DIR}/lapack-config-version.cmake | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION} | |||
| ${LAPACK_BINARY_DIR}/CMakeFiles/${LAPACKLIB}-config.cmake | |||
| ${LAPACK_BINARY_DIR}/${LAPACKLIB}-config-version.cmake | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKLIB}-${LAPACK_VERSION} | |||
| COMPONENT Development | |||
| ) | |||
| @@ -1,7 +1,7 @@ | |||
| message(STATUS "LAPACKE enable") | |||
| enable_language(C) | |||
| set(LAPACK_INSTALL_EXPORT_NAME lapacke-targets) | |||
| set(LAPACK_INSTALL_EXPORT_NAME ${LAPACKELIB}-targets) | |||
| # Create a header file lapacke_mangling.h for the routines called in my C programs | |||
| include(FortranCInterface) | |||
| @@ -72,28 +72,28 @@ if(LAPACKE_WITH_TMG) | |||
| endif() | |||
| list(APPEND SOURCES ${UTILS}) | |||
| add_library(lapacke ${SOURCES}) | |||
| add_library(${LAPACKELIB} ${SOURCES}) | |||
| set_target_properties( | |||
| lapacke PROPERTIES | |||
| ${LAPACKELIB} PROPERTIES | |||
| LINKER_LANGUAGE C | |||
| VERSION ${LAPACK_VERSION} | |||
| SOVERSION ${LAPACK_MAJOR_VERSION} | |||
| ) | |||
| target_include_directories(lapacke PUBLIC | |||
| target_include_directories(${LAPACKELIB} PUBLIC | |||
| $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> | |||
| $<INSTALL_INTERFACE:include> | |||
| ) | |||
| if(WIN32 AND NOT UNIX) | |||
| target_compile_definitions(lapacke PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE) | |||
| target_compile_definitions(${LAPACKELIB} PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE) | |||
| message(STATUS "Windows BUILD") | |||
| endif() | |||
| if(LAPACKE_WITH_TMG) | |||
| target_link_libraries(lapacke PRIVATE tmglib) | |||
| target_link_libraries(${LAPACKELIB} PRIVATE ${TMGLIB}) | |||
| endif() | |||
| target_link_libraries(lapacke PRIVATE ${LAPACK_LIBRARIES}) | |||
| target_link_libraries(${LAPACKELIB} PRIVATE ${LAPACK_LIBRARIES}) | |||
| lapack_install_library(lapacke) | |||
| lapack_install_library(${LAPACKELIB}) | |||
| install( | |||
| FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h | |||
| DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} | |||
| @@ -105,28 +105,28 @@ if(BUILD_TESTING) | |||
| endif() | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc @ONLY) | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKELIB}.pc @ONLY) | |||
| install(FILES | |||
| ${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc | |||
| ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKELIB}.pc | |||
| DESTINATION ${PKG_CONFIG_DIR} | |||
| COMPONENT Development | |||
| ) | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-version.cmake.in | |||
| ${LAPACK_BINARY_DIR}/lapacke-config-version.cmake @ONLY) | |||
| ${LAPACK_BINARY_DIR}/${LAPACKELIB}-config-version.cmake @ONLY) | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-build.cmake.in | |||
| ${LAPACK_BINARY_DIR}/lapacke-config.cmake @ONLY) | |||
| ${LAPACK_BINARY_DIR}/${LAPACKELIB}-config.cmake @ONLY) | |||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-install.cmake.in | |||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake @ONLY) | |||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${LAPACKELIB}-config.cmake @ONLY) | |||
| install(FILES | |||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake | |||
| ${LAPACK_BINARY_DIR}/lapacke-config-version.cmake | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION} | |||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${LAPACKELIB}-config.cmake | |||
| ${LAPACK_BINARY_DIR}/${LAPACKELIB}-config-version.cmake | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKELIB}-${LAPACK_VERSION} | |||
| COMPONENT Development | |||
| ) | |||
| install(EXPORT lapacke-targets | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION} | |||
| install(EXPORT ${LAPACKELIB}-targets | |||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKELIB}-${LAPACK_VERSION} | |||
| COMPONENT Development | |||
| ) | |||
| @@ -3,8 +3,8 @@ set(LAPACK_DIR "@LAPACK_BINARY_DIR@") | |||
| find_package(LAPACK NO_MODULE) | |||
| # Load lapack targets from the build tree, including lapacke targets. | |||
| if(NOT TARGET lapacke) | |||
| include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") | |||
| if(NOT TARGET @LAPACKELIB@) | |||
| include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake") | |||
| endif() | |||
| # Hint for project building against lapack | |||
| @@ -14,4 +14,4 @@ set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID}) | |||
| set(LAPACKE_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include") | |||
| # Report lapacke libraries. | |||
| set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES}) | |||
| set(LAPACKE_LIBRARIES @LAPACKELIB@ ${LAPACK_LIBRARIES}) | |||
| @@ -5,12 +5,12 @@ get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH) | |||
| get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH) | |||
| # Load the LAPACK package with which we were built. | |||
| set(LAPACK_DIR "${_LAPACKE_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/lapack-@LAPACK_VERSION@") | |||
| set(LAPACK_DIR "${_LAPACKE_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/@LAPACK@-@LAPACK_VERSION@") | |||
| find_package(LAPACK NO_MODULE) | |||
| # Load lapacke targets from the install tree. | |||
| if(NOT TARGET lapacke) | |||
| include(${_LAPACKE_SELF_DIR}/lapacke-targets.cmake) | |||
| if(NOT TARGET @LAPACKELIB@) | |||
| include(${_LAPACKE_SELF_DIR}/@LAPACKELIB@-targets.cmake) | |||
| endif() | |||
| # Hint for project building against lapack | |||
| @@ -20,7 +20,7 @@ set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID}) | |||
| set(LAPACKE_INCLUDE_DIRS ${_LAPACKE_PREFIX}/include) | |||
| # Report lapacke libraries. | |||
| set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES}) | |||
| set(LAPACKE_LIBRARIES @LAPACKELIB@ ${LAPACK_LIBRARIES}) | |||
| unset(_LAPACKE_PREFIX) | |||
| unset(_LAPACKE_SELF_DIR) | |||
| @@ -3,10 +3,10 @@ add_executable(xexample_DGESV_colmajor example_DGESV_colmajor.c lapacke_example_ | |||
| add_executable(xexample_DGELS_rowmajor example_DGELS_rowmajor.c lapacke_example_aux.c lapacke_example_aux.h) | |||
| add_executable(xexample_DGELS_colmajor example_DGELS_colmajor.c lapacke_example_aux.c lapacke_example_aux.h) | |||
| target_link_libraries(xexample_DGESV_rowmajor lapacke) | |||
| target_link_libraries(xexample_DGESV_colmajor lapacke) | |||
| target_link_libraries(xexample_DGELS_rowmajor lapacke) | |||
| target_link_libraries(xexample_DGELS_colmajor lapacke) | |||
| target_link_libraries(xexample_DGESV_rowmajor ${LAPACKELIB}) | |||
| target_link_libraries(xexample_DGESV_colmajor ${LAPACKELIB}) | |||
| target_link_libraries(xexample_DGELS_rowmajor ${LAPACKELIB}) | |||
| target_link_libraries(xexample_DGELS_colmajor ${LAPACKELIB}) | |||
| add_test(example_DGESV_rowmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_rowmajor) | |||
| add_test(example_DGESV_colmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_colmajor) | |||
| @@ -49,12 +49,13 @@ extern "C" { | |||
| #endif /* __cplusplus */ | |||
| #include <stdlib.h> | |||
| #include <stdint.h> | |||
| #ifndef lapack_int | |||
| #if defined(LAPACK_ILP64) | |||
| #define lapack_int long | |||
| #define lapack_int int64_t | |||
| #else | |||
| #define lapack_int int | |||
| #define lapack_int int32_t | |||
| #endif | |||
| #endif | |||
| @@ -67,7 +67,11 @@ extern "C" { | |||
| void LAPACKE_xerbla( const char *name, lapack_int info ); | |||
| /* Compare two chars (case-insensitive) */ | |||
| lapack_logical LAPACKE_lsame( char ca, char cb ); | |||
| lapack_logical LAPACKE_lsame( char ca, char cb ) | |||
| #if defined __GNUC__ | |||
| __attribute__((const)) | |||
| #endif | |||
| ; | |||
| /* Functions to convert column-major to row-major 2d arrays and vice versa. */ | |||
| void LAPACKE_cgb_trans( int matrix_layout, lapack_int m, lapack_int n, | |||
| @@ -5,6 +5,6 @@ Name: LAPACKE | |||
| Description: C Standard Interface to LAPACK Linear Algebra PACKage | |||
| Version: @LAPACK_VERSION@ | |||
| URL: http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack | |||
| Libs: -L${libdir} -llapacke | |||
| Libs: -L${libdir} -l@LAPACKELIB@ | |||
| Cflags: -I${includedir} | |||
| Requires.private: lapack | |||
| Requires.private: @LAPACKLIB@ | |||
| @@ -500,21 +500,21 @@ if(BUILD_COMPLEX16) | |||
| endif() | |||
| list(REMOVE_DUPLICATES SOURCES) | |||
| add_library(lapack ${SOURCES}) | |||
| add_library(${LAPACKLIB} ${SOURCES}) | |||
| set_target_properties( | |||
| lapack PROPERTIES | |||
| ${LAPACKLIB} PROPERTIES | |||
| VERSION ${LAPACK_VERSION} | |||
| SOVERSION ${LAPACK_MAJOR_VERSION} | |||
| ) | |||
| if(USE_XBLAS) | |||
| target_link_libraries(lapack PRIVATE ${XBLAS_LIBRARY}) | |||
| target_link_libraries(${LAPACKLIB} PRIVATE ${XBLAS_LIBRARY}) | |||
| endif() | |||
| target_link_libraries(lapack PRIVATE ${BLAS_LIBRARIES}) | |||
| target_link_libraries(${LAPACKLIB} PRIVATE ${BLAS_LIBRARIES}) | |||
| if(_is_coverage_build) | |||
| target_link_libraries(lapack PRIVATE gcov) | |||
| add_coverage(lapack) | |||
| target_link_libraries(${LAPACKLIB} PRIVATE gcov) | |||
| add_coverage(${LAPACKLIB}) | |||
| endif() | |||
| lapack_install_library(lapack) | |||
| lapack_install_library(${LAPACKLIB}) | |||
| @@ -47,6 +47,6 @@ if(BUILD_COMPLEX16) | |||
| endif() | |||
| list(REMOVE_DUPLICATES SOURCES) | |||
| add_library(tmglib ${SOURCES}) | |||
| target_link_libraries(tmglib ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) | |||
| lapack_install_library(tmglib) | |||
| add_library(${TMGLIB} ${SOURCES}) | |||
| target_link_libraries(${TMGLIB} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) | |||
| lapack_install_library(${TMGLIB}) | |||
| @@ -3128,9 +3128,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define SYMV_P 16 | |||
| #if defined(CORTEXA57) || \ | |||
| #if defined(CORTEXA57) || defined(CORTEXX1) || \ | |||
| defined(CORTEXA72) || defined(CORTEXA73) || \ | |||
| defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) | |||
| defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000) | |||
| #define SGEMM_DEFAULT_UNROLL_M 16 | |||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||
| @@ -3147,7 +3147,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| /*FIXME: this should be using the cache size, but there is currently no easy way to | |||
| query that on ARM. So if getarch counted more than 8 cores we simply assume the host | |||
| is a big desktop or server with abundant cache rather than a phone or embedded device */ | |||
| #if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) | |||
| #if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)|| defined(CORTEXX1) | |||
| #define SGEMM_DEFAULT_P 512 | |||
| #define DGEMM_DEFAULT_P 256 | |||
| #define CGEMM_DEFAULT_P 256 | |||
| @@ -3377,7 +3377,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d | |||
| #define CGEMM_DEFAULT_R 4096 | |||
| #define ZGEMM_DEFAULT_R 4096 | |||
| #elif defined(ARMV8SVE) || defined(A64FX) | |||
| #elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510)|| defined(CORTEXA710) || defined(CORTEXX2) | |||
| /* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl". | |||
| Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */ | |||
| @@ -3423,8 +3423,8 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout | |||
| #define SGEMM_DEFAULT_UNROLL_M 16 | |||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||
| #define DGEMM_DEFAULT_UNROLL_M 4 | |||
| #define DGEMM_DEFAULT_UNROLL_N 8 | |||
| #define DGEMM_DEFAULT_UNROLL_M 8 | |||
| #define DGEMM_DEFAULT_UNROLL_N 4 | |||
| #define CGEMM_DEFAULT_UNROLL_M 8 | |||
| #define CGEMM_DEFAULT_UNROLL_N 4 | |||
| @@ -115,7 +115,7 @@ | |||
| #define INCLUDE_CTGSYL INCLUDE_XTGSYL | |||
| #define INCLUDE_ZTGSYL INCLUDE_XTGSYL | |||
| #define INCLUDE_XGEMMT 0 | |||
| #define INCLUDE_XGEMMT 1 | |||
| #define INCLUDE_SGEMMT INCLUDE_XGEMMT | |||
| #define INCLUDE_DGEMMT INCLUDE_XGEMMT | |||
| #define INCLUDE_CGEMMT INCLUDE_XGEMMT | |||
| @@ -566,7 +566,8 @@ void LAPACK(sgemmt)( | |||
| const float *B, const blasint *ldB, | |||
| const float *beta, float *C, const blasint *ldC | |||
| ) { | |||
| RELAPACK_sgemmt(uplo, n, A, ldA, info); | |||
| blasint info; | |||
| RELAPACK_sgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info); | |||
| } | |||
| #endif | |||
| @@ -578,7 +579,8 @@ void LAPACK(dgemmt)( | |||
| const double *B, const blasint *ldB, | |||
| const double *beta, double *C, const blasint *ldC | |||
| ) { | |||
| RELAPACK_dgemmt(uplo, n, A, ldA, info); | |||
| blasint info; | |||
| RELAPACK_dgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info); | |||
| } | |||
| #endif | |||
| @@ -590,7 +592,8 @@ void LAPACK(cgemmt)( | |||
| const float *B, const blasint *ldB, | |||
| const float *beta, float *C, const blasint *ldC | |||
| ) { | |||
| RELAPACK_cgemmt(uplo, n, A, ldA, info); | |||
| blasint info; | |||
| RELAPACK_cgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info); | |||
| } | |||
| #endif | |||
| @@ -602,6 +605,7 @@ void LAPACK(zgemmt)( | |||
| const double *B, const blasint *ldB, | |||
| const double *beta, double *C, const blasint *ldC | |||
| ) { | |||
| RELAPACK_zgemmt(uplo, n, A, ldA, info); | |||
| blasint info; | |||
| RELAPACK_zgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info); | |||
| } | |||
| #endif | |||
| @@ -30,6 +30,10 @@ if(WIN32) | |||
| FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.ps1 | |||
| "if (Test-Path $args[2]) { Remove-Item -Force $args[2] } \n" | |||
| "$ErrorActionPreference = \"Stop\"\n" | |||
| "If ((Get-Content $args[1] | & file - | %{$_ -match \"BOM\"}) -contains $true) {\n" | |||
| "echo 'Skipped due to wrong input encoding'\n" | |||
| "exit 0\n" | |||
| "}\n" | |||
| "Get-Content $args[1] | & $args[0]\n" | |||
| "If ((Get-Content $args[2] | %{$_ -match \"FATAL\"}) -contains $true) {\n" | |||
| "echo Error\n" | |||