| @@ -23,9 +23,9 @@ if(MSVC AND NOT DEFINED NOFORTRAN) | |||||
| endif() | endif() | ||||
| ####### | ####### | ||||
| if(MSVC) | |||||
| option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" ON) | |||||
| endif() | |||||
| option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" OFF) | |||||
| option(BUILD_TESTING "Build LAPACK testsuite when building LAPACK" ON) | |||||
| option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF) | option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF) | ||||
| @@ -320,7 +320,9 @@ if (NOT NOFORTRAN) | |||||
| if(NOT NO_CBLAS) | if(NOT NO_CBLAS) | ||||
| add_subdirectory(ctest) | add_subdirectory(ctest) | ||||
| endif() | endif() | ||||
| add_subdirectory(lapack-netlib/TESTING) | |||||
| if (BUILD_TESTING) | |||||
| add_subdirectory(lapack-netlib/TESTING) | |||||
| endif() | |||||
| if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV) | if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV) | ||||
| add_subdirectory(cpp_thread_test) | add_subdirectory(cpp_thread_test) | ||||
| endif() | endif() | ||||
| @@ -55,6 +55,13 @@ FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73 | |||||
| endif | endif | ||||
| endif | endif | ||||
| ifeq ($(CORE), FT2000) | |||||
| CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||||
| ifneq ($(F_COMPILER), NAG) | |||||
| FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||||
| endif | |||||
| endif | |||||
| # Use a72 tunings because Neoverse-N1 is only available | # Use a72 tunings because Neoverse-N1 is only available | ||||
| # in GCC>=9 | # in GCC>=9 | ||||
| ifeq ($(CORE), NEOVERSEN1) | ifeq ($(CORE), NEOVERSEN1) | ||||
| @@ -229,6 +236,43 @@ endif | |||||
| endif | endif | ||||
| endif | endif | ||||
| ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||||
| ifeq ($(CORE), CORTEXX1) | |||||
| CCOMMON_OPT += -march=armv8.2-a -mtune=cortexa72 | |||||
| ifneq ($(F_COMPILER), NAG) | |||||
| FCOMMON_OPT += -march=armv8.2-a -mtune=cortexa72 | |||||
| endif | |||||
| endif | |||||
| endif | |||||
| ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||||
| ifeq ($(CORE), CORTEXX2) | |||||
| CCOMMON_OPT += -march=armv8.4-a+sve | |||||
| ifneq ($(F_COMPILER), NAG) | |||||
| FCOMMON_OPT += -march=armv8.4-a+sve | |||||
| endif | |||||
| endif | |||||
| endif | |||||
| #ifeq (1, $(filter 1,$(ISCLANG))) | |||||
| ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||||
| ifeq ($(CORE), CORTEXA510) | |||||
| CCOMMON_OPT += -march=armv8.4-a+sve | |||||
| ifneq ($(F_COMPILER), NAG) | |||||
| FCOMMON_OPT += -march=armv8.4-a+sve | |||||
| endif | |||||
| endif | |||||
| endif | |||||
| ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||||
| ifeq ($(CORE), CORTEXA710) | |||||
| CCOMMON_OPT += -march=armv8.4-a+sve | |||||
| ifneq ($(F_COMPILER), NAG) | |||||
| FCOMMON_OPT += -march=armv8.4-a+sve | |||||
| endif | |||||
| endif | |||||
| endif | |||||
| endif | endif | ||||
| endif | endif | ||||
| @@ -71,7 +71,8 @@ endif | |||||
| getarch : getarch.c cpuid.S dummy $(CPUIDEMU) | getarch : getarch.c cpuid.S dummy $(CPUIDEMU) | ||||
| $(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) -o $(@F) getarch.c cpuid.S $(CPUIDEMU) | |||||
| avx512=$$(perl c_check - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \ | |||||
| $(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} -o $(@F) getarch.c cpuid.S $(CPUIDEMU) | |||||
| getarch_2nd : getarch_2nd.c config.h dummy | getarch_2nd : getarch_2nd.c config.h dummy | ||||
| ifndef TARGET_CORE | ifndef TARGET_CORE | ||||
| @@ -92,6 +92,10 @@ CORTEXA53 | |||||
| CORTEXA57 | CORTEXA57 | ||||
| CORTEXA72 | CORTEXA72 | ||||
| CORTEXA73 | CORTEXA73 | ||||
| CORTEXA510 | |||||
| CORTEXA710 | |||||
| CORTEXX1 | |||||
| CORTEXX2 | |||||
| NEOVERSEN1 | NEOVERSEN1 | ||||
| NEOVERSEV1 | NEOVERSEV1 | ||||
| NEOVERSEN2 | NEOVERSEN2 | ||||
| @@ -103,6 +107,9 @@ THUNDERX2T99 | |||||
| TSV110 | TSV110 | ||||
| THUNDERX3T110 | THUNDERX3T110 | ||||
| VORTEX | VORTEX | ||||
| A64FX | |||||
| ARMV8SVE | |||||
| FT2000 | |||||
| 9.System Z: | 9.System Z: | ||||
| ZARCH_GENERIC | ZARCH_GENERIC | ||||
| @@ -65,7 +65,7 @@ jobs: | |||||
| - task: CMake@1 | - task: CMake@1 | ||||
| inputs: | inputs: | ||||
| workingDirectory: 'build' # Optional | workingDirectory: 'build' # Optional | ||||
| cmakeArgs: '-G "Visual Studio 16 2019" ..' | |||||
| cmakeArgs: '-G "Visual Studio 17 2022" ..' | |||||
| - task: CMake@1 | - task: CMake@1 | ||||
| inputs: | inputs: | ||||
| cmakeArgs: '--build . --config Release' | cmakeArgs: '--build . --config Release' | ||||
| @@ -103,7 +103,7 @@ jobs: | |||||
| - job: Windows_flang_clang | - job: Windows_flang_clang | ||||
| pool: | pool: | ||||
| vmImage: 'windows-latest' | |||||
| vmImage: 'windows-2022' | |||||
| steps: | steps: | ||||
| - script: | | - script: | | ||||
| set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%" | set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%" | ||||
| @@ -114,8 +114,8 @@ jobs: | |||||
| conda install --yes --quiet ninja flang | conda install --yes --quiet ninja flang | ||||
| mkdir build | mkdir build | ||||
| cd build | cd build | ||||
| call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat" | |||||
| cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON .. | |||||
| call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" | |||||
| cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON .. | |||||
| cmake --build . --config Release | cmake --build . --config Release | ||||
| ctest | ctest | ||||
| @@ -178,7 +178,7 @@ jobs: | |||||
| cmake -DTARGET=CORE2 -DDYNAMIC_ARCH=1 -DCMAKE_C_COMPILER=gcc-10 -DCMAKE_Fortran_COMPILER=gfortran-10 -DBUILD_SHARED_LIBS=ON .. | cmake -DTARGET=CORE2 -DDYNAMIC_ARCH=1 -DCMAKE_C_COMPILER=gcc-10 -DCMAKE_Fortran_COMPILER=gfortran-10 -DBUILD_SHARED_LIBS=ON .. | ||||
| cmake --build . | cmake --build . | ||||
| ctest | ctest | ||||
| - job: OSX_Ifort_Clang | - job: OSX_Ifort_Clang | ||||
| pool: | pool: | ||||
| vmImage: 'macOS-10.15' | vmImage: 'macOS-10.15' | ||||
| @@ -254,7 +254,7 @@ if (($architecture eq "x86") || ($architecture eq "x86_64")) { | |||||
| # $tmpf = new File::Temp( UNLINK => 1 ); | # $tmpf = new File::Temp( UNLINK => 1 ); | ||||
| ($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 ); | ($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 ); | ||||
| $code = '"vbroadcastss -4 * 4(%rsi), %zmm2"'; | $code = '"vbroadcastss -4 * 4(%rsi), %zmm2"'; | ||||
| print $tmpf "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n"; | |||||
| print $fh "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n"; | |||||
| $args = " -march=skylake-avx512 -c -o $tmpf.o $tmpf"; | $args = " -march=skylake-avx512 -c -o $tmpf.o $tmpf"; | ||||
| if ($compiler eq "PGI") { | if ($compiler eq "PGI") { | ||||
| $args = " -tp skylake -c -o $tmpf.o $tmpf"; | $args = " -tp skylake -c -o $tmpf.o $tmpf"; | ||||
| @@ -278,7 +278,7 @@ if ($data =~ /HAVE_C11/) { | |||||
| $c11_atomics = 0; | $c11_atomics = 0; | ||||
| } else { | } else { | ||||
| ($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 ); | ($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 ); | ||||
| print $tmpf "#include <stdatomic.h>\nint main(void){}\n"; | |||||
| print $fh "#include <stdatomic.h>\nint main(void){}\n"; | |||||
| $args = " -c -o $tmpf.o $tmpf"; | $args = " -c -o $tmpf.o $tmpf"; | ||||
| my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); | my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); | ||||
| system(@cmd) == 0; | system(@cmd) == 0; | ||||
| @@ -316,6 +316,7 @@ if ($architecture ne $hostarch) { | |||||
| } | } | ||||
| $cross = 1 if ($os ne $hostos); | $cross = 1 if ($os ne $hostos); | ||||
| $cross = 0 if (($os eq "Android") && ($hostos eq "Linux") && ($ENV{TERMUX_APP_PID} != "")); | |||||
| $openmp = "" if $ENV{USE_OPENMP} != 1; | $openmp = "" if $ENV{USE_OPENMP} != 1; | ||||
| @@ -161,6 +161,30 @@ if (${CORE} STREQUAL ARMV8SVE) | |||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| if (${CORE} STREQUAL CORTEXA510) | |||||
| if (NOT DYNAMIC_ARCH) | |||||
| set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") | |||||
| endif () | |||||
| endif () | |||||
| if (${CORE} STREQUAL CORTEXA710) | |||||
| if (NOT DYNAMIC_ARCH) | |||||
| set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") | |||||
| endif () | |||||
| endif () | |||||
| if (${CORE} STREQUAL CORTEXX1) | |||||
| if (NOT DYNAMIC_ARCH) | |||||
| set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a") | |||||
| endif () | |||||
| endif () | |||||
| if (${CORE} STREQUAL CORTEXX2) | |||||
| if (NOT DYNAMIC_ARCH) | |||||
| set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") | |||||
| endif () | |||||
| endif () | |||||
| if (${CORE} STREQUAL POWER10) | if (${CORE} STREQUAL POWER10) | ||||
| if (NOT DYNAMIC_ARCH) | if (NOT DYNAMIC_ARCH) | ||||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | ||||
| @@ -67,7 +67,15 @@ if (${F_COMPILER} STREQUAL "GFORTRAN") | |||||
| if (BINARY64) | if (BINARY64) | ||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | ||||
| if (INTERFACE64) | if (INTERFACE64) | ||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8") | |||||
| if (CMAKE_Fortran_COMPILER_ID STREQUAL "Intel") | |||||
| if (WIN32) | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} /integer-size:64") | |||||
| else () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -integer-size 64") | |||||
| endif () | |||||
| else () | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8") | |||||
| endif () | |||||
| endif () | endif () | ||||
| else () | else () | ||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -m32") | set(FCOMMON_OPT "${FCOMMON_OPT} -m32") | ||||
| @@ -2610,8 +2610,9 @@ | |||||
| #endif | #endif | ||||
| #ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
| #if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)\ | |||||
| || defined(ARCH_LOONGARCH64) || defined(ARCH_E2K) | |||||
| #if !defined(DYNAMIC_ARCH) \ | |||||
| && (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \ | |||||
| || defined(ARCH_LOONGARCH64) || defined(ARCH_E2K)) | |||||
| extern BLASLONG gemm_offset_a; | extern BLASLONG gemm_offset_a; | ||||
| extern BLASLONG gemm_offset_b; | extern BLASLONG gemm_offset_b; | ||||
| extern BLASLONG sbgemm_p; | extern BLASLONG sbgemm_p; | ||||
| @@ -45,6 +45,10 @@ size_t length64=sizeof(value64); | |||||
| #define CPU_NEOVERSEN1 11 | #define CPU_NEOVERSEN1 11 | ||||
| #define CPU_NEOVERSEV1 16 | #define CPU_NEOVERSEV1 16 | ||||
| #define CPU_NEOVERSEN2 17 | #define CPU_NEOVERSEN2 17 | ||||
| #define CPU_CORTEXX1 18 | |||||
| #define CPU_CORTEXX2 19 | |||||
| #define CPU_CORTEXA510 20 | |||||
| #define CPU_CORTEXA710 21 | |||||
| // Qualcomm | // Qualcomm | ||||
| #define CPU_FALKOR 6 | #define CPU_FALKOR 6 | ||||
| // Cavium | // Cavium | ||||
| @@ -59,6 +63,8 @@ size_t length64=sizeof(value64); | |||||
| #define CPU_VORTEX 13 | #define CPU_VORTEX 13 | ||||
| // Fujitsu | // Fujitsu | ||||
| #define CPU_A64FX 15 | #define CPU_A64FX 15 | ||||
| // Phytium | |||||
| #define CPU_FT2000 22 | |||||
| static char *cpuname[] = { | static char *cpuname[] = { | ||||
| "UNKNOWN", | "UNKNOWN", | ||||
| @@ -73,12 +79,17 @@ static char *cpuname[] = { | |||||
| "TSV110", | "TSV110", | ||||
| "EMAG8180", | "EMAG8180", | ||||
| "NEOVERSEN1", | "NEOVERSEN1", | ||||
| "NEOVERSEV1" | |||||
| "NEOVERSEN2" | |||||
| "THUNDERX3T110", | "THUNDERX3T110", | ||||
| "VORTEX", | "VORTEX", | ||||
| "CORTEXA55", | "CORTEXA55", | ||||
| "A64FX" | |||||
| "A64FX", | |||||
| "NEOVERSEV1", | |||||
| "NEOVERSEN2", | |||||
| "CORTEXX1", | |||||
| "CORTEXX2", | |||||
| "CORTEXA510", | |||||
| "CORTEXA710", | |||||
| "FT2000" | |||||
| }; | }; | ||||
| static char *cpuname_lower[] = { | static char *cpuname_lower[] = { | ||||
| @@ -94,12 +105,17 @@ static char *cpuname_lower[] = { | |||||
| "tsv110", | "tsv110", | ||||
| "emag8180", | "emag8180", | ||||
| "neoversen1", | "neoversen1", | ||||
| "neoversev1", | |||||
| "neoversen2", | |||||
| "thunderx3t110", | "thunderx3t110", | ||||
| "vortex", | "vortex", | ||||
| "cortexa55", | "cortexa55", | ||||
| "a64fx" | |||||
| "a64fx", | |||||
| "neoversev1", | |||||
| "neoversen2", | |||||
| "cortexx1", | |||||
| "cortexx2", | |||||
| "cortexa510", | |||||
| "cortexa710", | |||||
| "ft2000" | |||||
| }; | }; | ||||
| int get_feature(char *search) | int get_feature(char *search) | ||||
| @@ -182,6 +198,14 @@ int detect(void) | |||||
| return CPU_NEOVERSEN2; | return CPU_NEOVERSEN2; | ||||
| else if (strstr(cpu_part, "0xd05")) | else if (strstr(cpu_part, "0xd05")) | ||||
| return CPU_CORTEXA55; | return CPU_CORTEXA55; | ||||
| else if (strstr(cpu_part, "0xd46")) | |||||
| return CPU_CORTEXA510; | |||||
| else if (strstr(cpu_part, "0xd47")) | |||||
| return CPU_CORTEXA710; | |||||
| else if (strstr(cpu_part, "0xd44")) | |||||
| return CPU_CORTEXX1; | |||||
| else if (strstr(cpu_part, "0xd4c")) | |||||
| return CPU_CORTEXX2; | |||||
| } | } | ||||
| // Qualcomm | // Qualcomm | ||||
| else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) | else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) | ||||
| @@ -202,6 +226,13 @@ int detect(void) | |||||
| // Fujitsu | // Fujitsu | ||||
| else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001")) | else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001")) | ||||
| return CPU_A64FX; | return CPU_A64FX; | ||||
| // Apple | |||||
| else if (strstr(cpu_implementer, "0x61") && strstr(cpu_part, "0x022")) | |||||
| return CPU_VORTEX; | |||||
| // Phytium | |||||
| else if (strstr(cpu_implementer, "0x70") && (strstr(cpu_part, "0x660") || strstr(cpu_part, "0x661") | |||||
| || strstr(cpu_part, "0x662") || strstr(cpu_part, "0x663"))) | |||||
| return CPU_FT2000; | |||||
| } | } | ||||
| p = (char *) NULL ; | p = (char *) NULL ; | ||||
| @@ -382,7 +413,24 @@ void get_cpuconfig(void) | |||||
| printf("#define DTB_DEFAULT_ENTRIES 48\n"); | printf("#define DTB_DEFAULT_ENTRIES 48\n"); | ||||
| printf("#define DTB_SIZE 4096\n"); | printf("#define DTB_SIZE 4096\n"); | ||||
| break; | break; | ||||
| case CPU_CORTEXA510: | |||||
| case CPU_CORTEXA710: | |||||
| case CPU_CORTEXX1: | |||||
| case CPU_CORTEXX2: | |||||
| printf("#define ARMV9\n"); | |||||
| printf("#define %s\n", cpuname[d]); | |||||
| printf("#define L1_CODE_SIZE 65536\n"); | |||||
| printf("#define L1_CODE_LINESIZE 64\n"); | |||||
| printf("#define L1_CODE_ASSOCIATIVE 4\n"); | |||||
| printf("#define L1_DATA_SIZE 65536\n"); | |||||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||||
| printf("#define L1_DATA_ASSOCIATIVE 4\n"); | |||||
| printf("#define L2_SIZE 1048576\n"); | |||||
| printf("#define L2_LINESIZE 64\n"); | |||||
| printf("#define L2_ASSOCIATIVE 8\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| break; | |||||
| case CPU_FALKOR: | case CPU_FALKOR: | ||||
| printf("#define FALKOR\n"); | printf("#define FALKOR\n"); | ||||
| printf("#define L1_CODE_SIZE 65536\n"); | printf("#define L1_CODE_SIZE 65536\n"); | ||||
| @@ -469,9 +517,9 @@ void get_cpuconfig(void) | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | ||||
| printf("#define DTB_SIZE 4096 \n"); | printf("#define DTB_SIZE 4096 \n"); | ||||
| break; | break; | ||||
| #ifdef __APPLE__ | |||||
| case CPU_VORTEX: | case CPU_VORTEX: | ||||
| printf("#define VORTEX \n"); | printf("#define VORTEX \n"); | ||||
| #ifdef __APPLE__ | |||||
| sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0); | sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0); | ||||
| printf("#define L1_CODE_SIZE %lld \n",value64); | printf("#define L1_CODE_SIZE %lld \n",value64); | ||||
| sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0); | sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0); | ||||
| @@ -480,10 +528,10 @@ void get_cpuconfig(void) | |||||
| printf("#define L1_DATA_SIZE %lld \n",value64); | printf("#define L1_DATA_SIZE %lld \n",value64); | ||||
| sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0); | sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0); | ||||
| printf("#define L2_SIZE %lld \n",value64); | printf("#define L2_SIZE %lld \n",value64); | ||||
| #endif | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | ||||
| printf("#define DTB_SIZE 4096 \n"); | printf("#define DTB_SIZE 4096 \n"); | ||||
| break; | break; | ||||
| #endif | |||||
| case CPU_A64FX: | case CPU_A64FX: | ||||
| printf("#define A64FX\n"); | printf("#define A64FX\n"); | ||||
| printf("#define L1_CODE_SIZE 65535\n"); | printf("#define L1_CODE_SIZE 65535\n"); | ||||
| @@ -494,6 +542,16 @@ void get_cpuconfig(void) | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | printf("#define DTB_DEFAULT_ENTRIES 64\n"); | ||||
| printf("#define DTB_SIZE 4096\n"); | printf("#define DTB_SIZE 4096\n"); | ||||
| break; | break; | ||||
| case CPU_FT2000: | |||||
| printf("#define FT2000\n"); | |||||
| printf("#define L1_CODE_SIZE 32768\n"); | |||||
| printf("#define L1_DATA_SIZE 32768\n"); | |||||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||||
| printf("#define L2_SIZE 33554432\n"); | |||||
| printf("#define L2_LINESIZE 64\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| break; | |||||
| } | } | ||||
| get_cpucount(); | get_cpucount(); | ||||
| } | } | ||||
| @@ -1707,8 +1707,18 @@ int get_cpuname(void){ | |||||
| if (model == 0xf && stepping < 0xe) | if (model == 0xf && stepping < 0xe) | ||||
| return CPUTYPE_NANO; | return CPUTYPE_NANO; | ||||
| return CPUTYPE_NEHALEM; | return CPUTYPE_NEHALEM; | ||||
| case 0x7: | |||||
| switch (exmodel) { | |||||
| case 5: | |||||
| if (support_avx2()) | |||||
| return CPUTYPE_ZEN; | |||||
| else | |||||
| return CPUTYPE_DUNNINGTON; | |||||
| default: | |||||
| return CPUTYPE_NEHALEM; | |||||
| } | |||||
| default: | default: | ||||
| if (family >= 0x7) | |||||
| if (family >= 0x8) | |||||
| return CPUTYPE_NEHALEM; | return CPUTYPE_NEHALEM; | ||||
| else | else | ||||
| return CPUTYPE_VIAC3; | return CPUTYPE_VIAC3; | ||||
| @@ -1716,7 +1726,20 @@ int get_cpuname(void){ | |||||
| } | } | ||||
| if (vendor == VENDOR_ZHAOXIN){ | if (vendor == VENDOR_ZHAOXIN){ | ||||
| return CPUTYPE_NEHALEM; | |||||
| switch (family) { | |||||
| case 0x7: | |||||
| switch (exmodel) { | |||||
| case 5: | |||||
| if (support_avx2()) | |||||
| return CPUTYPE_ZEN; | |||||
| else | |||||
| return CPUTYPE_DUNNINGTON; | |||||
| default: | |||||
| return CPUTYPE_NEHALEM; | |||||
| } | |||||
| default: | |||||
| return CPUTYPE_NEHALEM; | |||||
| } | |||||
| } | } | ||||
| if (vendor == VENDOR_RISE){ | if (vendor == VENDOR_RISE){ | ||||
| @@ -2416,8 +2439,18 @@ int get_coretype(void){ | |||||
| if (model == 0xf && stepping < 0xe) | if (model == 0xf && stepping < 0xe) | ||||
| return CORE_NANO; | return CORE_NANO; | ||||
| return CORE_NEHALEM; | return CORE_NEHALEM; | ||||
| case 0x7: | |||||
| switch (exmodel) { | |||||
| case 5: | |||||
| if (support_avx2()) | |||||
| return CORE_ZEN; | |||||
| else | |||||
| return CORE_DUNNINGTON; | |||||
| default: | |||||
| return CORE_NEHALEM; | |||||
| } | |||||
| default: | default: | ||||
| if (family >= 0x7) | |||||
| if (family >= 0x8) | |||||
| return CORE_NEHALEM; | return CORE_NEHALEM; | ||||
| else | else | ||||
| return CORE_VIAC3; | return CORE_VIAC3; | ||||
| @@ -2425,7 +2458,20 @@ int get_coretype(void){ | |||||
| } | } | ||||
| if (vendor == VENDOR_ZHAOXIN) { | if (vendor == VENDOR_ZHAOXIN) { | ||||
| return CORE_NEHALEM; | |||||
| switch (family) { | |||||
| case 0x7: | |||||
| switch (exmodel) { | |||||
| case 5: | |||||
| if (support_avx2()) | |||||
| return CORE_ZEN; | |||||
| else | |||||
| return CORE_DUNNINGTON; | |||||
| default: | |||||
| return CORE_NEHALEM; | |||||
| } | |||||
| default: | |||||
| return CORE_NEHALEM; | |||||
| } | |||||
| } | } | ||||
| return CORE_UNKNOWN; | return CORE_UNKNOWN; | ||||
| @@ -96,7 +96,7 @@ extern gotoblas_t gotoblas_BARCELONA; | |||||
| #endif | #endif | ||||
| #ifdef DYN_ATOM | #ifdef DYN_ATOM | ||||
| extern gotoblas_t gotoblas_ATOM; | extern gotoblas_t gotoblas_ATOM; | ||||
| elif defined(DYN_NEHALEM) | |||||
| #elif defined(DYN_NEHALEM) | |||||
| #define gotoblas_ATOM gotoblas_NEHALEM | #define gotoblas_ATOM gotoblas_NEHALEM | ||||
| #else | #else | ||||
| #define gotoblas_ATOM gotoblas_PRESCOTT | #define gotoblas_ATOM gotoblas_PRESCOTT | ||||
| @@ -875,14 +875,37 @@ static gotoblas_t *get_coretype(void){ | |||||
| if (model == 0xf && stepping < 0xe) | if (model == 0xf && stepping < 0xe) | ||||
| return &gotoblas_NANO; | return &gotoblas_NANO; | ||||
| return &gotoblas_NEHALEM; | return &gotoblas_NEHALEM; | ||||
| case 0x7: | |||||
| switch (exmodel) { | |||||
| case 5: | |||||
| if (support_avx2()) | |||||
| return &gotoblas_ZEN; | |||||
| else | |||||
| return &gotoblas_DUNNINGTON; | |||||
| default: | |||||
| return &gotoblas_NEHALEM; | |||||
| } | |||||
| default: | default: | ||||
| if (family >= 0x7) | |||||
| if (family >= 0x8) | |||||
| return &gotoblas_NEHALEM; | return &gotoblas_NEHALEM; | ||||
| } | } | ||||
| } | } | ||||
| if (vendor == VENDOR_ZHAOXIN) { | if (vendor == VENDOR_ZHAOXIN) { | ||||
| return &gotoblas_NEHALEM; | |||||
| switch (family) { | |||||
| case 0x7: | |||||
| switch (exmodel) { | |||||
| case 5: | |||||
| if (support_avx2()) | |||||
| return &gotoblas_ZEN; | |||||
| else | |||||
| return &gotoblas_DUNNINGTON; | |||||
| default: | |||||
| return &gotoblas_NEHALEM; | |||||
| } | |||||
| default: | |||||
| return &gotoblas_NEHALEM; | |||||
| } | |||||
| } | } | ||||
| return NULL; | return NULL; | ||||
| @@ -60,6 +60,9 @@ static char* openblas_config_str="" | |||||
| #ifdef USE_OPENMP | #ifdef USE_OPENMP | ||||
| "USE_OPENMP " | "USE_OPENMP " | ||||
| #endif | #endif | ||||
| #ifdef USE_TLS | |||||
| "USE_TLS " | |||||
| #endif | |||||
| #ifndef DYNAMIC_ARCH | #ifndef DYNAMIC_ARCH | ||||
| CHAR_CORENAME | CHAR_CORENAME | ||||
| #endif | #endif | ||||
| @@ -94,14 +94,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include <sys/sysinfo.h> | #include <sys/sysinfo.h> | ||||
| #endif | #endif | ||||
| #if defined(__x86_64__) || defined(_M_X64) | |||||
| #if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX2__)) || (defined(__clang__) && __clang_major__ >= 6)) | |||||
| #else | |||||
| #ifndef NO_AVX512 | |||||
| #define NO_AVX512 | |||||
| #endif | |||||
| #endif | |||||
| #endif | |||||
| /* #define FORCE_P2 */ | /* #define FORCE_P2 */ | ||||
| /* #define FORCE_KATMAI */ | /* #define FORCE_KATMAI */ | ||||
| /* #define FORCE_COPPERMINE */ | /* #define FORCE_COPPERMINE */ | ||||
| @@ -1240,7 +1232,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "cortexa53" | #define LIBNAME "cortexa53" | ||||
| #define CORENAME "CORTEXA53" | #define CORENAME "CORTEXA53" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_CORTEXA57 | #ifdef FORCE_CORTEXA57 | ||||
| @@ -1256,7 +1247,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "cortexa57" | #define LIBNAME "cortexa57" | ||||
| #define CORENAME "CORTEXA57" | #define CORENAME "CORTEXA57" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_CORTEXA72 | #ifdef FORCE_CORTEXA72 | ||||
| @@ -1272,7 +1262,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "cortexa72" | #define LIBNAME "cortexa72" | ||||
| #define CORENAME "CORTEXA72" | #define CORENAME "CORTEXA72" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_CORTEXA73 | #ifdef FORCE_CORTEXA73 | ||||
| @@ -1288,7 +1277,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "cortexa73" | #define LIBNAME "cortexa73" | ||||
| #define CORENAME "CORTEXA73" | #define CORENAME "CORTEXA73" | ||||
| #else | |||||
| #endif | |||||
| #ifdef FORCE_CORTEXX1 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "CORTEXX1" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DCORTEXX1 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||||
| #define LIBNAME "cortexx1" | |||||
| #define CORENAME "CORTEXX1" | |||||
| #endif | |||||
| #ifdef FORCE_CORTEXX2 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "CORTEXX2" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DCORTEXX2 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||||
| #define LIBNAME "cortexx2" | |||||
| #define CORENAME "CORTEXX2" | |||||
| #endif | |||||
| #ifdef FORCE_CORTEXA510 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "CORTEXA510" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DCORTEXA510 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||||
| #define LIBNAME "cortexa510" | |||||
| #define CORENAME "CORTEXA510" | |||||
| #endif | |||||
| #ifdef FORCE_CORTEXA710 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "CORTEXA710" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DCORTEXA710 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||||
| #define LIBNAME "cortexa710" | |||||
| #define CORENAME "CORTEXA710" | |||||
| #endif | #endif | ||||
| #ifdef FORCE_NEOVERSEN1 | #ifdef FORCE_NEOVERSEN1 | ||||
| @@ -1305,7 +1349,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-march=armv8.2-a -mtune=neoverse-n1" | "-march=armv8.2-a -mtune=neoverse-n1" | ||||
| #define LIBNAME "neoversen1" | #define LIBNAME "neoversen1" | ||||
| #define CORENAME "NEOVERSEN1" | #define CORENAME "NEOVERSEN1" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_NEOVERSEV1 | #ifdef FORCE_NEOVERSEV1 | ||||
| @@ -1322,7 +1365,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-march=armv8.4-a -mtune=neoverse-v1" | "-march=armv8.4-a -mtune=neoverse-v1" | ||||
| #define LIBNAME "neoversev1" | #define LIBNAME "neoversev1" | ||||
| #define CORENAME "NEOVERSEV1" | #define CORENAME "NEOVERSEV1" | ||||
| #else | |||||
| #endif | #endif | ||||
| @@ -1340,7 +1382,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-march=armv8.5-a -mtune=neoverse-n2" | "-march=armv8.5-a -mtune=neoverse-n2" | ||||
| #define LIBNAME "neoversen2" | #define LIBNAME "neoversen2" | ||||
| #define CORENAME "NEOVERSEN2" | #define CORENAME "NEOVERSEN2" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_CORTEXA55 | #ifdef FORCE_CORTEXA55 | ||||
| @@ -1356,7 +1397,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "cortexa55" | #define LIBNAME "cortexa55" | ||||
| #define CORENAME "CORTEXA55" | #define CORENAME "CORTEXA55" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_FALKOR | #ifdef FORCE_FALKOR | ||||
| @@ -1372,7 +1412,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "falkor" | #define LIBNAME "falkor" | ||||
| #define CORENAME "FALKOR" | #define CORENAME "FALKOR" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_THUNDERX | #ifdef FORCE_THUNDERX | ||||
| @@ -1387,7 +1426,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "thunderx" | #define LIBNAME "thunderx" | ||||
| #define CORENAME "THUNDERX" | #define CORENAME "THUNDERX" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_THUNDERX2T99 | #ifdef FORCE_THUNDERX2T99 | ||||
| @@ -1405,7 +1443,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "thunderx2t99" | #define LIBNAME "thunderx2t99" | ||||
| #define CORENAME "THUNDERX2T99" | #define CORENAME "THUNDERX2T99" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_TSV110 | #ifdef FORCE_TSV110 | ||||
| @@ -1421,7 +1458,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "tsv110" | #define LIBNAME "tsv110" | ||||
| #define CORENAME "TSV110" | #define CORENAME "TSV110" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_EMAG8180 | #ifdef FORCE_EMAG8180 | ||||
| @@ -1456,7 +1492,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | ||||
| #define LIBNAME "thunderx3t110" | #define LIBNAME "thunderx3t110" | ||||
| #define CORENAME "THUNDERX3T110" | #define CORENAME "THUNDERX3T110" | ||||
| #else | |||||
| #endif | #endif | ||||
| #ifdef FORCE_VORTEX | #ifdef FORCE_VORTEX | ||||
| @@ -1488,7 +1523,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8" | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8" | ||||
| #define LIBNAME "a64fx" | #define LIBNAME "a64fx" | ||||
| #define CORENAME "A64FX" | #define CORENAME "A64FX" | ||||
| #else | |||||
| #endif | |||||
| #ifdef FORCE_FT2000 | |||||
| #define ARMV8 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "FT2000" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DFT2000 " \ | |||||
| "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \ | |||||
| "-DL2_SIZE=33554426-DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||||
| #define LIBNAME "ft2000" | |||||
| #define CORENAME "FT2000" | |||||
| #endif | #endif | ||||
| #ifdef FORCE_ZARCH_GENERIC | #ifdef FORCE_ZARCH_GENERIC | ||||
| @@ -678,7 +678,7 @@ endif () | |||||
| set(SBGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) | set(SBGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) | ||||
| endif () | endif () | ||||
| if (NOT DEFINED SBGEMM_SMALL_K_B0_TT) | if (NOT DEFINED SBGEMM_SMALL_K_B0_TT) | ||||
| set($SBGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) | |||||
| set(SBGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) | |||||
| endif () | endif () | ||||
| GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "BFLOAT16") | GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "BFLOAT16") | ||||
| GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "BFLOAT16") | GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "BFLOAT16") | ||||
| @@ -0,0 +1,216 @@ | |||||
| SAMINKERNEL = ../arm/amin.c | |||||
| DAMINKERNEL = ../arm/amin.c | |||||
| CAMINKERNEL = ../arm/zamin.c | |||||
| ZAMINKERNEL = ../arm/zamin.c | |||||
| SMAXKERNEL = ../arm/max.c | |||||
| DMAXKERNEL = ../arm/max.c | |||||
| SMINKERNEL = ../arm/min.c | |||||
| DMINKERNEL = ../arm/min.c | |||||
| ISAMINKERNEL = ../arm/iamin.c | |||||
| IDAMINKERNEL = ../arm/iamin.c | |||||
| ICAMINKERNEL = ../arm/izamin.c | |||||
| IZAMINKERNEL = ../arm/izamin.c | |||||
| ISMAXKERNEL = ../arm/imax.c | |||||
| IDMAXKERNEL = ../arm/imax.c | |||||
| ISMINKERNEL = ../arm/imin.c | |||||
| IDMINKERNEL = ../arm/imin.c | |||||
| STRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| STRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| STRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| STRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| DTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| DTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| DTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| DTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| TRSMCOPYLN_M = trsm_lncopy_sve.c | |||||
| TRSMCOPYLT_M = trsm_ltcopy_sve.c | |||||
| TRSMCOPYUN_M = trsm_uncopy_sve.c | |||||
| TRSMCOPYUT_M = trsm_utcopy_sve.c | |||||
| CTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| CTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| CTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| CTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c | |||||
| ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c | |||||
| ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c | |||||
| ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c | |||||
| SAMAXKERNEL = amax.S | |||||
| DAMAXKERNEL = amax.S | |||||
| CAMAXKERNEL = zamax.S | |||||
| ZAMAXKERNEL = zamax.S | |||||
| SAXPYKERNEL = axpy.S | |||||
| DAXPYKERNEL = axpy.S | |||||
| CAXPYKERNEL = zaxpy.S | |||||
| ZAXPYKERNEL = zaxpy.S | |||||
| SROTKERNEL = rot.S | |||||
| DROTKERNEL = rot.S | |||||
| CROTKERNEL = zrot.S | |||||
| ZROTKERNEL = zrot.S | |||||
| SSCALKERNEL = scal.S | |||||
| DSCALKERNEL = scal.S | |||||
| CSCALKERNEL = zscal.S | |||||
| ZSCALKERNEL = zscal.S | |||||
| SGEMVNKERNEL = gemv_n.S | |||||
| DGEMVNKERNEL = gemv_n.S | |||||
| CGEMVNKERNEL = zgemv_n.S | |||||
| ZGEMVNKERNEL = zgemv_n.S | |||||
| SGEMVTKERNEL = gemv_t.S | |||||
| DGEMVTKERNEL = gemv_t.S | |||||
| CGEMVTKERNEL = zgemv_t.S | |||||
| ZGEMVTKERNEL = zgemv_t.S | |||||
| SASUMKERNEL = asum.S | |||||
| DASUMKERNEL = asum.S | |||||
| CASUMKERNEL = casum.S | |||||
| ZASUMKERNEL = zasum.S | |||||
| SCOPYKERNEL = copy.S | |||||
| DCOPYKERNEL = copy.S | |||||
| CCOPYKERNEL = copy.S | |||||
| ZCOPYKERNEL = copy.S | |||||
| SSWAPKERNEL = swap.S | |||||
| DSWAPKERNEL = swap.S | |||||
| CSWAPKERNEL = swap.S | |||||
| ZSWAPKERNEL = swap.S | |||||
| ISAMAXKERNEL = iamax.S | |||||
| IDAMAXKERNEL = iamax.S | |||||
| ICAMAXKERNEL = izamax.S | |||||
| IZAMAXKERNEL = izamax.S | |||||
| SNRM2KERNEL = nrm2.S | |||||
| DNRM2KERNEL = nrm2.S | |||||
| CNRM2KERNEL = znrm2.S | |||||
| ZNRM2KERNEL = znrm2.S | |||||
| DDOTKERNEL = dot.S | |||||
| ifneq ($(C_COMPILER), PGI) | |||||
| SDOTKERNEL = ../generic/dot.c | |||||
| else | |||||
| SDOTKERNEL = dot.S | |||||
| endif | |||||
| ifneq ($(C_COMPILER), PGI) | |||||
| CDOTKERNEL = zdot.S | |||||
| ZDOTKERNEL = zdot.S | |||||
| else | |||||
| CDOTKERNEL = ../arm/zdot.c | |||||
| ZDOTKERNEL = ../arm/zdot.c | |||||
| endif | |||||
| DSDOTKERNEL = dot.S | |||||
| DGEMM_BETA = dgemm_beta.S | |||||
| SGEMM_BETA = sgemm_beta.S | |||||
| SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||||
| STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||||
| SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||||
| SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||||
| SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
| SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| STRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||||
| STRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||||
| STRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||||
| STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||||
| SSYMMUCOPY_M = symm_ucopy_sve.c | |||||
| SSYMMLCOPY_M = symm_lcopy_sve.c | |||||
| DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||||
| DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||||
| DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||||
| DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||||
| DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
| DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||||
| DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||||
| DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||||
| DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||||
| DSYMMUCOPY_M = symm_ucopy_sve.c | |||||
| DSYMMLCOPY_M = symm_lcopy_sve.c | |||||
| CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||||
| CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||||
| CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||||
| CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||||
| CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||||
| CHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||||
| CHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||||
| CSYMMUCOPY_M = zsymm_ucopy_sve.c | |||||
| CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||||
| ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||||
| ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||||
| ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||||
| ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||||
| ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||||
| ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||||
| ZHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||||
| ZSYMMUCOPY_M = zsymm_ucopy_sve.c | |||||
| ZSYMMLCOPY_M = zsymm_lcopy_sve.c | |||||
| @@ -0,0 +1,216 @@ | |||||
| SAMINKERNEL = ../arm/amin.c | |||||
| DAMINKERNEL = ../arm/amin.c | |||||
| CAMINKERNEL = ../arm/zamin.c | |||||
| ZAMINKERNEL = ../arm/zamin.c | |||||
| SMAXKERNEL = ../arm/max.c | |||||
| DMAXKERNEL = ../arm/max.c | |||||
| SMINKERNEL = ../arm/min.c | |||||
| DMINKERNEL = ../arm/min.c | |||||
| ISAMINKERNEL = ../arm/iamin.c | |||||
| IDAMINKERNEL = ../arm/iamin.c | |||||
| ICAMINKERNEL = ../arm/izamin.c | |||||
| IZAMINKERNEL = ../arm/izamin.c | |||||
| ISMAXKERNEL = ../arm/imax.c | |||||
| IDMAXKERNEL = ../arm/imax.c | |||||
| ISMINKERNEL = ../arm/imin.c | |||||
| IDMINKERNEL = ../arm/imin.c | |||||
| STRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| STRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| STRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| STRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| DTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| DTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| DTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| DTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| TRSMCOPYLN_M = trsm_lncopy_sve.c | |||||
| TRSMCOPYLT_M = trsm_ltcopy_sve.c | |||||
| TRSMCOPYUN_M = trsm_uncopy_sve.c | |||||
| TRSMCOPYUT_M = trsm_utcopy_sve.c | |||||
| CTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| CTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| CTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| CTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c | |||||
| ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c | |||||
| ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c | |||||
| ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c | |||||
| SAMAXKERNEL = amax.S | |||||
| DAMAXKERNEL = amax.S | |||||
| CAMAXKERNEL = zamax.S | |||||
| ZAMAXKERNEL = zamax.S | |||||
| SAXPYKERNEL = axpy.S | |||||
| DAXPYKERNEL = axpy.S | |||||
| CAXPYKERNEL = zaxpy.S | |||||
| ZAXPYKERNEL = zaxpy.S | |||||
| SROTKERNEL = rot.S | |||||
| DROTKERNEL = rot.S | |||||
| CROTKERNEL = zrot.S | |||||
| ZROTKERNEL = zrot.S | |||||
| SSCALKERNEL = scal.S | |||||
| DSCALKERNEL = scal.S | |||||
| CSCALKERNEL = zscal.S | |||||
| ZSCALKERNEL = zscal.S | |||||
| SGEMVNKERNEL = gemv_n.S | |||||
| DGEMVNKERNEL = gemv_n.S | |||||
| CGEMVNKERNEL = zgemv_n.S | |||||
| ZGEMVNKERNEL = zgemv_n.S | |||||
| SGEMVTKERNEL = gemv_t.S | |||||
| DGEMVTKERNEL = gemv_t.S | |||||
| CGEMVTKERNEL = zgemv_t.S | |||||
| ZGEMVTKERNEL = zgemv_t.S | |||||
| SASUMKERNEL = asum.S | |||||
| DASUMKERNEL = asum.S | |||||
| CASUMKERNEL = casum.S | |||||
| ZASUMKERNEL = zasum.S | |||||
| SCOPYKERNEL = copy.S | |||||
| DCOPYKERNEL = copy.S | |||||
| CCOPYKERNEL = copy.S | |||||
| ZCOPYKERNEL = copy.S | |||||
| SSWAPKERNEL = swap.S | |||||
| DSWAPKERNEL = swap.S | |||||
| CSWAPKERNEL = swap.S | |||||
| ZSWAPKERNEL = swap.S | |||||
| ISAMAXKERNEL = iamax.S | |||||
| IDAMAXKERNEL = iamax.S | |||||
| ICAMAXKERNEL = izamax.S | |||||
| IZAMAXKERNEL = izamax.S | |||||
| SNRM2KERNEL = nrm2.S | |||||
| DNRM2KERNEL = nrm2.S | |||||
| CNRM2KERNEL = znrm2.S | |||||
| ZNRM2KERNEL = znrm2.S | |||||
| DDOTKERNEL = dot.S | |||||
| ifneq ($(C_COMPILER), PGI) | |||||
| SDOTKERNEL = ../generic/dot.c | |||||
| else | |||||
| SDOTKERNEL = dot.S | |||||
| endif | |||||
| ifneq ($(C_COMPILER), PGI) | |||||
| CDOTKERNEL = zdot.S | |||||
| ZDOTKERNEL = zdot.S | |||||
| else | |||||
| CDOTKERNEL = ../arm/zdot.c | |||||
| ZDOTKERNEL = ../arm/zdot.c | |||||
| endif | |||||
| DSDOTKERNEL = dot.S | |||||
| DGEMM_BETA = dgemm_beta.S | |||||
| SGEMM_BETA = sgemm_beta.S | |||||
| SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||||
| STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||||
| SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||||
| SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||||
| SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
| SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| STRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||||
| STRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||||
| STRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||||
| STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||||
| SSYMMUCOPY_M = symm_ucopy_sve.c | |||||
| SSYMMLCOPY_M = symm_lcopy_sve.c | |||||
| DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||||
| DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||||
| DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||||
| DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||||
| DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
| DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||||
| DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||||
| DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||||
| DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||||
| DSYMMUCOPY_M = symm_ucopy_sve.c | |||||
| DSYMMLCOPY_M = symm_lcopy_sve.c | |||||
| CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||||
| CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||||
| CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||||
| CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||||
| CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||||
| CHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||||
| CHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||||
| CSYMMUCOPY_M = zsymm_ucopy_sve.c | |||||
| CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||||
| ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||||
| ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||||
| ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||||
| ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||||
| ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||||
| ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||||
| ZHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||||
| ZSYMMUCOPY_M = zsymm_ucopy_sve.c | |||||
| ZSYMMLCOPY_M = zsymm_lcopy_sve.c | |||||
| @@ -0,0 +1 @@ | |||||
| include $(KERNELDIR)/KERNEL.CORTEXA57 | |||||
| @@ -0,0 +1,216 @@ | |||||
| SAMINKERNEL = ../arm/amin.c | |||||
| DAMINKERNEL = ../arm/amin.c | |||||
| CAMINKERNEL = ../arm/zamin.c | |||||
| ZAMINKERNEL = ../arm/zamin.c | |||||
| SMAXKERNEL = ../arm/max.c | |||||
| DMAXKERNEL = ../arm/max.c | |||||
| SMINKERNEL = ../arm/min.c | |||||
| DMINKERNEL = ../arm/min.c | |||||
| ISAMINKERNEL = ../arm/iamin.c | |||||
| IDAMINKERNEL = ../arm/iamin.c | |||||
| ICAMINKERNEL = ../arm/izamin.c | |||||
| IZAMINKERNEL = ../arm/izamin.c | |||||
| ISMAXKERNEL = ../arm/imax.c | |||||
| IDMAXKERNEL = ../arm/imax.c | |||||
| ISMINKERNEL = ../arm/imin.c | |||||
| IDMINKERNEL = ../arm/imin.c | |||||
| STRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| STRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| STRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| STRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| DTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| DTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| DTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| DTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| TRSMCOPYLN_M = trsm_lncopy_sve.c | |||||
| TRSMCOPYLT_M = trsm_ltcopy_sve.c | |||||
| TRSMCOPYUN_M = trsm_uncopy_sve.c | |||||
| TRSMCOPYUT_M = trsm_utcopy_sve.c | |||||
| CTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| CTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| CTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| CTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||||
| ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||||
| ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||||
| ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||||
| ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c | |||||
| ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c | |||||
| ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c | |||||
| ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c | |||||
| SAMAXKERNEL = amax.S | |||||
| DAMAXKERNEL = amax.S | |||||
| CAMAXKERNEL = zamax.S | |||||
| ZAMAXKERNEL = zamax.S | |||||
| SAXPYKERNEL = axpy.S | |||||
| DAXPYKERNEL = axpy.S | |||||
| CAXPYKERNEL = zaxpy.S | |||||
| ZAXPYKERNEL = zaxpy.S | |||||
| SROTKERNEL = rot.S | |||||
| DROTKERNEL = rot.S | |||||
| CROTKERNEL = zrot.S | |||||
| ZROTKERNEL = zrot.S | |||||
| SSCALKERNEL = scal.S | |||||
| DSCALKERNEL = scal.S | |||||
| CSCALKERNEL = zscal.S | |||||
| ZSCALKERNEL = zscal.S | |||||
| SGEMVNKERNEL = gemv_n.S | |||||
| DGEMVNKERNEL = gemv_n.S | |||||
| CGEMVNKERNEL = zgemv_n.S | |||||
| ZGEMVNKERNEL = zgemv_n.S | |||||
| SGEMVTKERNEL = gemv_t.S | |||||
| DGEMVTKERNEL = gemv_t.S | |||||
| CGEMVTKERNEL = zgemv_t.S | |||||
| ZGEMVTKERNEL = zgemv_t.S | |||||
| SASUMKERNEL = asum.S | |||||
| DASUMKERNEL = asum.S | |||||
| CASUMKERNEL = casum.S | |||||
| ZASUMKERNEL = zasum.S | |||||
| SCOPYKERNEL = copy.S | |||||
| DCOPYKERNEL = copy.S | |||||
| CCOPYKERNEL = copy.S | |||||
| ZCOPYKERNEL = copy.S | |||||
| SSWAPKERNEL = swap.S | |||||
| DSWAPKERNEL = swap.S | |||||
| CSWAPKERNEL = swap.S | |||||
| ZSWAPKERNEL = swap.S | |||||
| ISAMAXKERNEL = iamax.S | |||||
| IDAMAXKERNEL = iamax.S | |||||
| ICAMAXKERNEL = izamax.S | |||||
| IZAMAXKERNEL = izamax.S | |||||
| SNRM2KERNEL = nrm2.S | |||||
| DNRM2KERNEL = nrm2.S | |||||
| CNRM2KERNEL = znrm2.S | |||||
| ZNRM2KERNEL = znrm2.S | |||||
| DDOTKERNEL = dot.S | |||||
| ifneq ($(C_COMPILER), PGI) | |||||
| SDOTKERNEL = ../generic/dot.c | |||||
| else | |||||
| SDOTKERNEL = dot.S | |||||
| endif | |||||
| ifneq ($(C_COMPILER), PGI) | |||||
| CDOTKERNEL = zdot.S | |||||
| ZDOTKERNEL = zdot.S | |||||
| else | |||||
| CDOTKERNEL = ../arm/zdot.c | |||||
| ZDOTKERNEL = ../arm/zdot.c | |||||
| endif | |||||
| DSDOTKERNEL = dot.S | |||||
| DGEMM_BETA = dgemm_beta.S | |||||
| SGEMM_BETA = sgemm_beta.S | |||||
| SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||||
| STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||||
| SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||||
| SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||||
| SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
| SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| STRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||||
| STRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||||
| STRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||||
| STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||||
| SSYMMUCOPY_M = symm_ucopy_sve.c | |||||
| SSYMMLCOPY_M = symm_lcopy_sve.c | |||||
| DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||||
| DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||||
| DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||||
| DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||||
| DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
| DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||||
| DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||||
| DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||||
| DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||||
| DSYMMUCOPY_M = symm_ucopy_sve.c | |||||
| DSYMMLCOPY_M = symm_lcopy_sve.c | |||||
| CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||||
| CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||||
| CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||||
| CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||||
| CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||||
| CHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||||
| CHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||||
| CSYMMUCOPY_M = zsymm_ucopy_sve.c | |||||
| CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||||
| ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||||
| ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||||
| ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||||
| ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||||
| ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||||
| ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||||
| ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||||
| ZHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||||
| ZSYMMUCOPY_M = zsymm_ucopy_sve.c | |||||
| ZSYMMLCOPY_M = zsymm_lcopy_sve.c | |||||
| @@ -0,0 +1,3 @@ | |||||
| include $(KERNELDIR)/KERNEL.CORTEXA57 | |||||
| @@ -1239,7 +1239,6 @@ static void init_parameter(void) { | |||||
| #ifdef BUILD_BFLOAT16 | #ifdef BUILD_BFLOAT16 | ||||
| TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P; | TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P; | ||||
| TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R; | |||||
| TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q; | TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q; | ||||
| #endif | #endif | ||||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | ||||
| @@ -1824,6 +1823,13 @@ static void init_parameter(void) { | |||||
| fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p); | fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p); | ||||
| #endif | #endif | ||||
| #if BUILD_BFLOAT16==1 | |||||
| TABLE_NAME.sbgemm_r = (((BUFFER_SIZE - | |||||
| ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA | |||||
| + TABLE_NAME.align) & ~TABLE_NAME.align) | |||||
| ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15); | |||||
| #endif | |||||
| #if BUILD_SINGLE==1 | #if BUILD_SINGLE==1 | ||||
| TABLE_NAME.sgemm_r = (((BUFFER_SIZE - | TABLE_NAME.sgemm_r = (((BUFFER_SIZE - | ||||
| ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA | ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA | ||||
| @@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | ||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9)) | |||||
| #include <immintrin.h> | #include <immintrin.h> | ||||
| #include "common.h" | #include "common.h" | ||||
| @@ -47,7 +48,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| _mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N) | _mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N) | ||||
| #define MASK_STORE_512(M, N) \ | #define MASK_STORE_512(M, N) \ | ||||
| result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \ | result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \ | ||||
| asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "k"(mask)); \ | |||||
| asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "Yk"(mask)); \ | |||||
| _mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N) | _mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N) | ||||
| #endif | #endif | ||||
| @@ -265,7 +266,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||||
| int mm = M - i; | int mm = M - i; | ||||
| if (!mm) return 0; | if (!mm) return 0; | ||||
| if (mm > 4 || K < 16) { | if (mm > 4 || K < 16) { | ||||
| register __mmask8 mask asm("k1") = (1UL << mm) - 1; | |||||
| register __mmask8 mask = (1UL << mm) - 1; | |||||
| for (j = 0; j < n6; j += 6) { | for (j = 0; j < n6; j += 6) { | ||||
| DECLARE_RESULT_512(0, 0); | DECLARE_RESULT_512(0, 0); | ||||
| DECLARE_RESULT_512(0, 1); | DECLARE_RESULT_512(0, 1); | ||||
| @@ -588,3 +589,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| #else | |||||
| #include "../generic/gemm_small_matrix_kernel_nn.c" | |||||
| #endif | |||||
| @@ -55,7 +55,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| _mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N) | _mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N) | ||||
| #define MASK_STORE_512(M, N) \ | #define MASK_STORE_512(M, N) \ | ||||
| result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \ | result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \ | ||||
| asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "k"(mask)); \ | |||||
| asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "Yk"(mask)); \ | |||||
| _mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N) | _mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N) | ||||
| #define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \ | #define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \ | ||||
| __m512d tmp##M##N = _mm512_i64gather_pd(vindex_n, &C[(j + N*8)*ldc + i + M], 8); \ | __m512d tmp##M##N = _mm512_i64gather_pd(vindex_n, &C[(j + N*8)*ldc + i + M], 8); \ | ||||
| @@ -303,7 +303,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||||
| } | } | ||||
| int mm = M - i; | int mm = M - i; | ||||
| if (mm >= 6) { | if (mm >= 6) { | ||||
| register __mmask16 mask asm("k1") = (1UL << mm) - 1; | |||||
| register __mmask16 mask = (1UL << mm) - 1; | |||||
| for (j = 0; j < n8; j += 8) { | for (j = 0; j < n8; j += 8) { | ||||
| DECLARE_RESULT_512(0, 0); | DECLARE_RESULT_512(0, 0); | ||||
| DECLARE_RESULT_512(0, 1); | DECLARE_RESULT_512(0, 1); | ||||
| @@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | ||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9)) | |||||
| #include <immintrin.h> | #include <immintrin.h> | ||||
| #include "common.h" | #include "common.h" | ||||
| @@ -320,3 +321,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| #else | |||||
| #include "../generic/gemm_small_matrix_kernel_tn.c" | |||||
| #endif | |||||
| @@ -114,10 +114,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| asm("vmovups %0, (%1, %2, 4)": : "v"(val1), "r"(addr), "r"(ldc)) | asm("vmovups %0, (%1, %2, 4)": : "v"(val1), "r"(addr), "r"(ldc)) | ||||
| #define _MASK_STORE_C_2nx16(addr, val0, val1) \ | #define _MASK_STORE_C_2nx16(addr, val0, val1) \ | ||||
| asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "k"(mmask)); \ | |||||
| asm("vfmadd213ps (%1, %3, 4), %2, %0 %{%4%}": "+v"(val1) : "r"(addr), "v"(alpha_512), "r"(ldc), "k"(mmask)); \ | |||||
| asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "k"(mmask)); \ | |||||
| asm("vmovups %0, (%1, %2, 4) %{%3%}": : "v"(val1), "r"(addr), "r"(ldc), "k"(mmask)) | |||||
| asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "Yk"(mmask)); \ | |||||
| asm("vfmadd213ps (%1, %3, 4), %2, %0 %{%4%}": "+v"(val1) : "r"(addr), "v"(alpha_512), "r"(ldc), "Yk"(mmask)); \ | |||||
| asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "Yk"(mmask)); \ | |||||
| asm("vmovups %0, (%1, %2, 4) %{%3%}": : "v"(val1), "r"(addr), "r"(ldc), "Yk"(mmask)) | |||||
| #define _REORDER_C_2X(result_0, result_1) { \ | #define _REORDER_C_2X(result_0, result_1) { \ | ||||
| __m512 tmp0, tmp1; \ | __m512 tmp0, tmp1; \ | ||||
| @@ -154,8 +154,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| asm("vmovups %0, (%1)": : "v"(val0), "r"(addr)); | asm("vmovups %0, (%1)": : "v"(val0), "r"(addr)); | ||||
| #define _MASK_STORE_C_16(addr, val0) \ | #define _MASK_STORE_C_16(addr, val0) \ | ||||
| asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "k"(mmask)); \ | |||||
| asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "k"(mmask)); | |||||
| asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "Yk"(mmask)); \ | |||||
| asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "Yk"(mmask)); | |||||
| #define N_STORE_4X(A, Bx, By) { \ | #define N_STORE_4X(A, Bx, By) { \ | ||||
| _REORDER_C_2X(result_00_##A##Bx##By, result_01_##A##Bx##By); \ | _REORDER_C_2X(result_00_##A##Bx##By, result_01_##A##Bx##By); \ | ||||
| @@ -13,6 +13,8 @@ | |||||
| #define ONE 1.e0f | #define ONE 1.e0f | ||||
| #define ZERO 0.e0f | #define ZERO 0.e0f | ||||
| #define SHUFFLE_MAGIC_NO (const int) 0x39 | |||||
| #undef STORE16_COMPLETE_RESULT | #undef STORE16_COMPLETE_RESULT | ||||
| #undef STORE16_MASK_COMPLETE_RESULT | #undef STORE16_MASK_COMPLETE_RESULT | ||||
| #undef SBGEMM_BLOCK_KERNEL_NN_32x8xK | #undef SBGEMM_BLOCK_KERNEL_NN_32x8xK | ||||
| @@ -356,7 +358,6 @@ void sbgemm_block_kernel_nn_32xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa | |||||
| bfloat16 * B_addr = B; | bfloat16 * B_addr = B; | ||||
| float * C_addr = C; | float * C_addr = C; | ||||
| int SHUFFLE_MAGIC_NO = 0x39; | |||||
| BLASLONG tag_k_32x = k & (~31); | BLASLONG tag_k_32x = k & (~31); | ||||
| #ifndef ONE_ALPHA | #ifndef ONE_ALPHA | ||||
| @@ -465,7 +466,6 @@ void sbgemm_block_kernel_nn_16xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa | |||||
| bfloat16 * B_addr = B; | bfloat16 * B_addr = B; | ||||
| float * C_addr = C; | float * C_addr = C; | ||||
| int SHUFFLE_MAGIC_NO = 0x39; | |||||
| BLASLONG tag_k_32x = k & (~31); | BLASLONG tag_k_32x = k & (~31); | ||||
| #ifndef ONE_ALPHA | #ifndef ONE_ALPHA | ||||
| @@ -1192,7 +1192,6 @@ void sbgemm_block_kernel_tn_32xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa | |||||
| bfloat16 * B_addr = B; | bfloat16 * B_addr = B; | ||||
| float * C_addr = C; | float * C_addr = C; | ||||
| int SHUFFLE_MAGIC_NO = 0x39; | |||||
| BLASLONG tag_k_32x = k & (~31); | BLASLONG tag_k_32x = k & (~31); | ||||
| #ifndef ONE_ALPHA | #ifndef ONE_ALPHA | ||||
| @@ -1291,7 +1290,6 @@ void sbgemm_block_kernel_tn_16xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa | |||||
| bfloat16 * B_addr = B; | bfloat16 * B_addr = B; | ||||
| float * C_addr = C; | float * C_addr = C; | ||||
| int SHUFFLE_MAGIC_NO = 0x39; | |||||
| BLASLONG tag_k_32x = k & (~31); | BLASLONG tag_k_32x = k & (~31); | ||||
| #ifndef ONE_ALPHA | #ifndef ONE_ALPHA | ||||
| @@ -135,7 +135,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){ | |||||
| 0x0, 0x1, 0x2, 0x3, 0x10, 0x11, 0x12, 0x13, 0x8, 0x9, 0xa, 0xb, 0x18, 0x19, 0x1a, 0x1b, | 0x0, 0x1, 0x2, 0x3, 0x10, 0x11, 0x12, 0x13, 0x8, 0x9, 0xa, 0xb, 0x18, 0x19, 0x1a, 0x1b, | ||||
| 0x4, 0x5, 0x6, 0x7, 0x14, 0x15, 0x16, 0x17, 0xc, 0xd, 0xe, 0xf, 0x1c, 0x1d, 0x1e, 0x1f, | 0x4, 0x5, 0x6, 0x7, 0x14, 0x15, 0x16, 0x17, 0xc, 0xd, 0xe, 0xf, 0x1c, 0x1d, 0x1e, 0x1f, | ||||
| }; | }; | ||||
| u_int64_t permute_table2[] = { | |||||
| uint64_t permute_table2[] = { | |||||
| 0x00, 0x01, 0x02, 0x03, 8|0x0, 8|0x1, 8|0x2, 8|0x3, | 0x00, 0x01, 0x02, 0x03, 8|0x0, 8|0x1, 8|0x2, 8|0x3, | ||||
| 0x04, 0x05, 0x06, 0x07, 8|0x4, 8|0x5, 8|0x6, 8|0x7, | 0x04, 0x05, 0x06, 0x07, 8|0x4, 8|0x5, 8|0x6, 8|0x7, | ||||
| }; | }; | ||||
| @@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | ||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9)) | |||||
| #include <immintrin.h> | #include <immintrin.h> | ||||
| #include "common.h" | #include "common.h" | ||||
| @@ -47,7 +48,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| _mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N) | _mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N) | ||||
| #define MASK_STORE_512(M, N) \ | #define MASK_STORE_512(M, N) \ | ||||
| result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \ | result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \ | ||||
| asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "k"(mask)); \ | |||||
| asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "Yk"(mask)); \ | |||||
| _mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N) | _mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N) | ||||
| #endif | #endif | ||||
| @@ -266,7 +267,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||||
| int mm = M - i; | int mm = M - i; | ||||
| if (!mm) return 0; | if (!mm) return 0; | ||||
| if (mm > 8 || K < 32) { | if (mm > 8 || K < 32) { | ||||
| register __mmask16 mask asm("k1") = (1UL << mm) - 1; | |||||
| register __mmask16 mask = (1UL << mm) - 1; | |||||
| for (j = 0; j < n6; j += 6) { | for (j = 0; j < n6; j += 6) { | ||||
| DECLARE_RESULT_512(0, 0); | DECLARE_RESULT_512(0, 0); | ||||
| DECLARE_RESULT_512(0, 1); | DECLARE_RESULT_512(0, 1); | ||||
| @@ -610,3 +611,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| #else | |||||
| #include "../generic/gemm_small_matrix_kernel_nn.c" | |||||
| #endif | |||||
| @@ -55,7 +55,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| _mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N) | _mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N) | ||||
| #define MASK_STORE_512(M, N) \ | #define MASK_STORE_512(M, N) \ | ||||
| result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \ | result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \ | ||||
| asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "k"(mask)); \ | |||||
| asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "Yk"(mask)); \ | |||||
| _mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N) | _mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N) | ||||
| #define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \ | #define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \ | ||||
| __m512 tmp##M##N = _mm512_i32gather_ps(vindex_n, &C[(j + N*16)*ldc + i + M], 4); \ | __m512 tmp##M##N = _mm512_i32gather_ps(vindex_n, &C[(j + N*16)*ldc + i + M], 4); \ | ||||
| @@ -303,7 +303,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||||
| } | } | ||||
| int mm = M - i; | int mm = M - i; | ||||
| if (mm >= 12) { | if (mm >= 12) { | ||||
| register __mmask16 mask asm("k1") = (1UL << mm) - 1; | |||||
| register __mmask16 mask = (1UL << mm) - 1; | |||||
| for (j = 0; j < n8; j += 8) { | for (j = 0; j < n8; j += 8) { | ||||
| DECLARE_RESULT_512(0, 0); | DECLARE_RESULT_512(0, 0); | ||||
| DECLARE_RESULT_512(0, 1); | DECLARE_RESULT_512(0, 1); | ||||
| @@ -24,6 +24,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | ||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9)) | |||||
| #include <immintrin.h> | #include <immintrin.h> | ||||
| #include "common.h" | #include "common.h" | ||||
| @@ -314,3 +315,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| #else | |||||
| #include "../generic/gemm_small_matrix_kernel_tn.c" | |||||
| #endif | |||||
| @@ -452,11 +452,6 @@ | |||||
| MOVDDUP(4 * SIZE, A1, a1) | MOVDDUP(4 * SIZE, A1, a1) | ||||
| movsd 0 * SIZE(YY), yy1 | |||||
| movhpd 1 * SIZE(YY), yy1 | |||||
| movsd 2 * SIZE(YY), yy2 | |||||
| movhpd 3 * SIZE(YY), yy2 | |||||
| movapd 8 * SIZE(XX), xtemp1 | movapd 8 * SIZE(XX), xtemp1 | ||||
| movapd 10 * SIZE(XX), xtemp2 | movapd 10 * SIZE(XX), xtemp2 | ||||
| movapd 12 * SIZE(XX), xtemp3 | movapd 12 * SIZE(XX), xtemp3 | ||||
| @@ -475,6 +470,12 @@ | |||||
| MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2) | MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2) | ||||
| ALIGN_3 | ALIGN_3 | ||||
| .L12_prep: | |||||
| movsd 0 * SIZE(YY), yy1 | |||||
| movhpd 1 * SIZE(YY), yy1 | |||||
| movsd 2 * SIZE(YY), yy2 | |||||
| movhpd 3 * SIZE(YY), yy2 | |||||
| .L12: | .L12: | ||||
| movapd xtemp1, xt1 | movapd xtemp1, xt1 | ||||
| mulpd a1, xt1 | mulpd a1, xt1 | ||||
| @@ -608,8 +609,6 @@ | |||||
| movlpd yy2, 6 * SIZE(YY) | movlpd yy2, 6 * SIZE(YY) | ||||
| movhpd yy2, 7 * SIZE(YY) | movhpd yy2, 7 * SIZE(YY) | ||||
| movsd 10 * SIZE(YY), yy2 | |||||
| movhpd 11 * SIZE(YY), yy2 | |||||
| movapd xtemp2, xt1 | movapd xtemp2, xt1 | ||||
| movapd 18 * SIZE(XX), xtemp2 | movapd 18 * SIZE(XX), xtemp2 | ||||
| @@ -621,8 +620,6 @@ | |||||
| movlpd yy1, 4 * SIZE(YY) | movlpd yy1, 4 * SIZE(YY) | ||||
| movhpd yy1, 5 * SIZE(YY) | movhpd yy1, 5 * SIZE(YY) | ||||
| movsd 8 * SIZE(YY), yy1 | |||||
| movhpd 9 * SIZE(YY), yy1 | |||||
| subq $-16 * SIZE, XX | subq $-16 * SIZE, XX | ||||
| addq $ 8 * SIZE, YY | addq $ 8 * SIZE, YY | ||||
| @@ -630,7 +627,8 @@ | |||||
| addq $ 8 * SIZE, A2 | addq $ 8 * SIZE, A2 | ||||
| decq I | decq I | ||||
| jg .L12 | |||||
| jg .L12_prep | |||||
| jmp .L15 | |||||
| ALIGN_3 | ALIGN_3 | ||||
| .L14: | .L14: | ||||
| @@ -641,7 +639,6 @@ | |||||
| jle .L16 | jle .L16 | ||||
| MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2) | MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2) | ||||
| jmp .L15_pastcheck | |||||
| .L15: | .L15: | ||||
| movq M, I | movq M, I | ||||
| @@ -650,6 +647,11 @@ | |||||
| testq $2, I | testq $2, I | ||||
| jle .L16 | jle .L16 | ||||
| movsd 0 * SIZE(YY), yy1 | |||||
| movhpd 1 * SIZE(YY), yy1 | |||||
| movsd 2 * SIZE(YY), yy2 | |||||
| movhpd 3 * SIZE(YY), yy2 | |||||
| .L15_pastcheck: | .L15_pastcheck: | ||||
| movapd xtemp1, xt1 | movapd xtemp1, xt1 | ||||
| mulpd a1, xt1 | mulpd a1, xt1 | ||||
| @@ -705,8 +707,6 @@ | |||||
| movlpd yy2, 2 * SIZE(YY) | movlpd yy2, 2 * SIZE(YY) | ||||
| movhpd yy2, 3 * SIZE(YY) | movhpd yy2, 3 * SIZE(YY) | ||||
| movsd 6 * SIZE(YY), yy2 | |||||
| movhpd 7 * SIZE(YY), yy2 | |||||
| movapd xtemp2, xt1 | movapd xtemp2, xt1 | ||||
| movapd 10 * SIZE(XX), xtemp2 | movapd 10 * SIZE(XX), xtemp2 | ||||
| @@ -717,8 +717,6 @@ | |||||
| movlpd yy1, 0 * SIZE(YY) | movlpd yy1, 0 * SIZE(YY) | ||||
| movhpd yy1, 1 * SIZE(YY) | movhpd yy1, 1 * SIZE(YY) | ||||
| movsd 4 * SIZE(YY), yy1 | |||||
| movhpd 5 * SIZE(YY), yy1 | |||||
| addq $4 * SIZE, YY | addq $4 * SIZE, YY | ||||
| addq $4 * SIZE, A1 | addq $4 * SIZE, A1 | ||||
| @@ -731,6 +729,9 @@ | |||||
| MOVDDUP(1 * SIZE, A1, a2) | MOVDDUP(1 * SIZE, A1, a2) | ||||
| movsd 0 * SIZE(YY), yy1 | |||||
| movhpd 1 * SIZE(YY), yy1 | |||||
| movapd xtemp1, xt1 | movapd xtemp1, xt1 | ||||
| mulpd a1, xt1 | mulpd a1, xt1 | ||||
| mulpd atemp1, a1 | mulpd atemp1, a1 | ||||
| @@ -2,9 +2,9 @@ add_subdirectory(SRC) | |||||
| if(BUILD_TESTING) | if(BUILD_TESTING) | ||||
| add_subdirectory(TESTING) | add_subdirectory(TESTING) | ||||
| endif() | endif() | ||||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/blas.pc @ONLY) | |||||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${BLASLIB}.pc @ONLY) | |||||
| install(FILES | install(FILES | ||||
| ${CMAKE_CURRENT_BINARY_DIR}/blas.pc | |||||
| ${CMAKE_CURRENT_BINARY_DIR}/${BLASLIB}.pc | |||||
| DESTINATION ${PKG_CONFIG_DIR} | DESTINATION ${PKG_CONFIG_DIR} | ||||
| COMPONENT Development | COMPONENT Development | ||||
| ) | ) | ||||
| @@ -97,10 +97,10 @@ if(BUILD_COMPLEX16) | |||||
| endif() | endif() | ||||
| list(REMOVE_DUPLICATES SOURCES) | list(REMOVE_DUPLICATES SOURCES) | ||||
| add_library(blas ${SOURCES}) | |||||
| add_library(${BLASLIB} ${SOURCES}) | |||||
| set_target_properties( | set_target_properties( | ||||
| blas PROPERTIES | |||||
| ${BLASLIB} PROPERTIES | |||||
| VERSION ${LAPACK_VERSION} | VERSION ${LAPACK_VERSION} | ||||
| SOVERSION ${LAPACK_MAJOR_VERSION} | SOVERSION ${LAPACK_MAJOR_VERSION} | ||||
| ) | ) | ||||
| lapack_install_library(blas) | |||||
| lapack_install_library(${BLASLIB}) | |||||
| @@ -2,7 +2,7 @@ macro(add_blas_test name src) | |||||
| get_filename_component(baseNAME ${src} NAME_WE) | get_filename_component(baseNAME ${src} NAME_WE) | ||||
| set(TEST_INPUT "${CMAKE_CURRENT_SOURCE_DIR}/${baseNAME}.in") | set(TEST_INPUT "${CMAKE_CURRENT_SOURCE_DIR}/${baseNAME}.in") | ||||
| add_executable(${name} ${src}) | add_executable(${name} ${src}) | ||||
| target_link_libraries(${name} blas) | |||||
| target_link_libraries(${name} ${BLASLIB}) | |||||
| if(EXISTS "${TEST_INPUT}") | if(EXISTS "${TEST_INPUT}") | ||||
| add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}" | add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}" | ||||
| -DTEST=$<TARGET_FILE:${name}> | -DTEST=$<TARGET_FILE:${name}> | ||||
| @@ -5,4 +5,4 @@ Name: BLAS | |||||
| Description: FORTRAN reference implementation of BLAS Basic Linear Algebra Subprograms | Description: FORTRAN reference implementation of BLAS Basic Linear Algebra Subprograms | ||||
| Version: @LAPACK_VERSION@ | Version: @LAPACK_VERSION@ | ||||
| URL: http://www.netlib.org/blas/ | URL: http://www.netlib.org/blas/ | ||||
| Libs: -L${libdir} -lblas | |||||
| Libs: -L${libdir} -l@BLASLIB@ | |||||
| @@ -1,7 +1,7 @@ | |||||
| message(STATUS "CBLAS enable") | message(STATUS "CBLAS enable") | ||||
| enable_language(C) | enable_language(C) | ||||
| set(LAPACK_INSTALL_EXPORT_NAME cblas-targets) | |||||
| set(LAPACK_INSTALL_EXPORT_NAME ${CBLASLIB}-targets) | |||||
| # Create a header file cblas.h for the routines called in my C programs | # Create a header file cblas.h for the routines called in my C programs | ||||
| include(FortranCInterface) | include(FortranCInterface) | ||||
| @@ -42,15 +42,15 @@ if(BUILD_TESTING) | |||||
| endif() | endif() | ||||
| if(NOT BLAS_FOUND) | if(NOT BLAS_FOUND) | ||||
| set(ALL_TARGETS ${ALL_TARGETS} blas) | |||||
| set(ALL_TARGETS ${ALL_TARGETS} ${BLASLIB}) | |||||
| endif() | endif() | ||||
| # Export cblas targets from the | # Export cblas targets from the | ||||
| # install tree, if any. | # install tree, if any. | ||||
| set(_cblas_config_install_guard_target "") | set(_cblas_config_install_guard_target "") | ||||
| if(ALL_TARGETS) | if(ALL_TARGETS) | ||||
| install(EXPORT cblas-targets | |||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} | |||||
| install(EXPORT ${CBLASLIB}-targets | |||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION} | |||||
| COMPONENT Development | COMPONENT Development | ||||
| ) | ) | ||||
| # Choose one of the cblas targets to use as a guard for | # Choose one of the cblas targets to use as a guard for | ||||
| @@ -61,7 +61,7 @@ endif() | |||||
| # Export cblas targets from the build tree, if any. | # Export cblas targets from the build tree, if any. | ||||
| set(_cblas_config_build_guard_target "") | set(_cblas_config_build_guard_target "") | ||||
| if(ALL_TARGETS) | if(ALL_TARGETS) | ||||
| export(TARGETS ${ALL_TARGETS} FILE cblas-targets.cmake) | |||||
| export(TARGETS ${ALL_TARGETS} FILE ${CBLASLIB}-targets.cmake) | |||||
| # Choose one of the cblas targets to use as a guard | # Choose one of the cblas targets to use as a guard | ||||
| # for cblas-config.cmake to load targets from the build tree. | # for cblas-config.cmake to load targets from the build tree. | ||||
| @@ -69,26 +69,26 @@ if(ALL_TARGETS) | |||||
| endif() | endif() | ||||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-version.cmake.in | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-version.cmake.in | ||||
| ${LAPACK_BINARY_DIR}/cblas-config-version.cmake @ONLY) | |||||
| ${LAPACK_BINARY_DIR}/${CBLASLIB}-config-version.cmake @ONLY) | |||||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-build.cmake.in | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-build.cmake.in | ||||
| ${LAPACK_BINARY_DIR}/cblas-config.cmake @ONLY) | |||||
| ${LAPACK_BINARY_DIR}/${CBLASLIB}-config.cmake @ONLY) | |||||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cblas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/cblas.pc @ONLY) | |||||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cblas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${CBLASLIB}.pc @ONLY) | |||||
| install(FILES | install(FILES | ||||
| ${CMAKE_CURRENT_BINARY_DIR}/cblas.pc | |||||
| ${CMAKE_CURRENT_BINARY_DIR}/${CBLASLIB}.pc | |||||
| DESTINATION ${PKG_CONFIG_DIR} | DESTINATION ${PKG_CONFIG_DIR} | ||||
| ) | ) | ||||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-install.cmake.in | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-install.cmake.in | ||||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake @ONLY) | |||||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${CBLASLIB}-config.cmake @ONLY) | |||||
| install(FILES | install(FILES | ||||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake | |||||
| ${LAPACK_BINARY_DIR}/cblas-config-version.cmake | |||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} | |||||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${CBLASLIB}-config.cmake | |||||
| ${LAPACK_BINARY_DIR}/${CBLASLIB}-config-version.cmake | |||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION} | |||||
| ) | ) | ||||
| #install(EXPORT cblas-targets | |||||
| # DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} | |||||
| #install(EXPORT ${CBLASLIB}-targets | |||||
| # DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION} | |||||
| # COMPONENT Development | # COMPONENT Development | ||||
| # ) | # ) | ||||
| @@ -5,6 +5,6 @@ Name: CBLAS | |||||
| Description: C Standard Interface to BLAS Basic Linear Algebra Subprograms | Description: C Standard Interface to BLAS Basic Linear Algebra Subprograms | ||||
| Version: @LAPACK_VERSION@ | Version: @LAPACK_VERSION@ | ||||
| URL: http://www.netlib.org/blas/#_cblas | URL: http://www.netlib.org/blas/#_cblas | ||||
| Libs: -L${libdir} -lcblas | |||||
| Libs: -L${libdir} -l@CBLASLIB@ | |||||
| Cflags: -I${includedir} | Cflags: -I${includedir} | ||||
| Requires.private: blas | |||||
| Requires.private: @BLASLIB@ | |||||
| @@ -4,11 +4,11 @@ find_package(LAPACK NO_MODULE) | |||||
| # Load lapack targets from the build tree, including lapacke targets. | # Load lapack targets from the build tree, including lapacke targets. | ||||
| if(NOT TARGET lapacke) | if(NOT TARGET lapacke) | ||||
| include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") | |||||
| include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake") | |||||
| endif() | endif() | ||||
| # Report cblas header search locations from build tree. | # Report cblas header search locations from build tree. | ||||
| set(CBLAS_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include") | set(CBLAS_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include") | ||||
| # Report cblas libraries. | # Report cblas libraries. | ||||
| set(CBLAS_LIBRARIES cblas) | |||||
| set(CBLAS_LIBRARIES @CBLASLIB@) | |||||
| @@ -5,19 +5,19 @@ get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH) | |||||
| get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH) | get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH) | ||||
| # Load the LAPACK package with which we were built. | # Load the LAPACK package with which we were built. | ||||
| set(LAPACK_DIR "${_CBLAS_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/lapack-@LAPACK_VERSION@") | |||||
| set(LAPACK_DIR "${_CBLAS_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/@LAPACKLIB@-@LAPACK_VERSION@") | |||||
| find_package(LAPACK NO_MODULE) | find_package(LAPACK NO_MODULE) | ||||
| # Load lapacke targets from the install tree. | # Load lapacke targets from the install tree. | ||||
| if(NOT TARGET cblas) | |||||
| include(${_CBLAS_SELF_DIR}/cblas-targets.cmake) | |||||
| if(NOT TARGET @CBLASLIB@) | |||||
| include(${_CBLAS_SELF_DIR}/@CBLASLIB@-targets.cmake) | |||||
| endif() | endif() | ||||
| # Report lapacke header search locations. | # Report lapacke header search locations. | ||||
| set(CBLAS_INCLUDE_DIRS ${_CBLAS_PREFIX}/include) | set(CBLAS_INCLUDE_DIRS ${_CBLAS_PREFIX}/include) | ||||
| # Report lapacke libraries. | # Report lapacke libraries. | ||||
| set(CBLAS_LIBRARIES cblas) | |||||
| set(CBLAS_LIBRARIES @CBLASLIB@) | |||||
| unset(_CBLAS_PREFIX) | unset(_CBLAS_PREFIX) | ||||
| unset(_CBLAS_SELF_DIR) | unset(_CBLAS_SELF_DIR) | ||||
| @@ -1,8 +1,8 @@ | |||||
| add_executable(xexample1_CBLAS cblas_example1.c) | add_executable(xexample1_CBLAS cblas_example1.c) | ||||
| add_executable(xexample2_CBLAS cblas_example2.c) | add_executable(xexample2_CBLAS cblas_example2.c) | ||||
| target_link_libraries(xexample1_CBLAS cblas) | |||||
| target_link_libraries(xexample2_CBLAS cblas ${BLAS_LIBRARIES}) | |||||
| target_link_libraries(xexample1_CBLAS ${CBLASLIB}) | |||||
| target_link_libraries(xexample2_CBLAS ${CBLASLIB} ${BLAS_LIBRARIES}) | |||||
| add_test(example1_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample1_CBLAS) | add_test(example1_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample1_CBLAS) | ||||
| add_test(example2_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample2_CBLAS) | add_test(example2_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample2_CBLAS) | ||||
| @@ -11,7 +11,7 @@ int main ( ) | |||||
| double *a, *x, *y; | double *a, *x, *y; | ||||
| double alpha, beta; | double alpha, beta; | ||||
| int m, n, lda, incx, incy, i; | |||||
| CBLAS_INDEX m, n, lda, incx, incy, i; | |||||
| Layout = CblasColMajor; | Layout = CblasColMajor; | ||||
| transa = CblasNoTrans; | transa = CblasNoTrans; | ||||
| @@ -9,7 +9,7 @@ | |||||
| int main (int argc, char **argv ) | int main (int argc, char **argv ) | ||||
| { | { | ||||
| int rout=-1,info=0,m,n,k,lda,ldb,ldc; | |||||
| CBLAS_INDEX rout=-1,info=0,m,n,k,lda,ldb,ldc; | |||||
| double A[2] = {0.0,0.0}, | double A[2] = {0.0,0.0}, | ||||
| B[2] = {0.0,0.0}, | B[2] = {0.0,0.0}, | ||||
| C[2] = {0.0,0.0}, | C[2] = {0.0,0.0}, | ||||
| @@ -1,6 +1,7 @@ | |||||
| #ifndef CBLAS_H | #ifndef CBLAS_H | ||||
| #define CBLAS_H | #define CBLAS_H | ||||
| #include <stddef.h> | #include <stddef.h> | ||||
| #include <stdint.h> | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| @@ -11,9 +12,9 @@ extern "C" { /* Assume C declarations for C++ */ | |||||
| * Enumerated and derived types | * Enumerated and derived types | ||||
| */ | */ | ||||
| #ifdef WeirdNEC | #ifdef WeirdNEC | ||||
| #define CBLAS_INDEX long | |||||
| #define CBLAS_INDEX int64_t | |||||
| #else | #else | ||||
| #define CBLAS_INDEX int | |||||
| #define CBLAS_INDEX int32_t | |||||
| #endif | #endif | ||||
| typedef enum {CblasRowMajor=101, CblasColMajor=102} CBLAS_LAYOUT; | typedef enum {CblasRowMajor=101, CblasColMajor=102} CBLAS_LAYOUT; | ||||
| @@ -9,6 +9,8 @@ | |||||
| #ifndef CBLAS_F77_H | #ifndef CBLAS_F77_H | ||||
| #define CBLAS_F77_H | #define CBLAS_F77_H | ||||
| #include <stdint.h> | |||||
| #ifdef CRAY | #ifdef CRAY | ||||
| #include <fortran.h> | #include <fortran.h> | ||||
| #define F77_CHAR _fcd | #define F77_CHAR _fcd | ||||
| @@ -17,8 +19,12 @@ | |||||
| #define F77_STRLEN(a) (_fcdlen) | #define F77_STRLEN(a) (_fcdlen) | ||||
| #endif | #endif | ||||
| #ifndef F77_INT | |||||
| #ifdef WeirdNEC | #ifdef WeirdNEC | ||||
| #define F77_INT long | |||||
| #define F77_INT int64_t | |||||
| #else | |||||
| #define F77_INT int32_t | |||||
| #endif | |||||
| #endif | #endif | ||||
| #ifdef F77_CHAR | #ifdef F77_CHAR | ||||
| @@ -113,16 +113,16 @@ if(BUILD_COMPLEX16) | |||||
| endif() | endif() | ||||
| list(REMOVE_DUPLICATES SOURCES) | list(REMOVE_DUPLICATES SOURCES) | ||||
| add_library(cblas ${SOURCES}) | |||||
| add_library(${CBLASLIB} ${SOURCES}) | |||||
| set_target_properties( | set_target_properties( | ||||
| cblas PROPERTIES | |||||
| ${CBLASLIB} PROPERTIES | |||||
| LINKER_LANGUAGE C | LINKER_LANGUAGE C | ||||
| VERSION ${LAPACK_VERSION} | VERSION ${LAPACK_VERSION} | ||||
| SOVERSION ${LAPACK_MAJOR_VERSION} | SOVERSION ${LAPACK_MAJOR_VERSION} | ||||
| ) | ) | ||||
| target_include_directories(cblas PUBLIC | |||||
| target_include_directories(${CBLASLIB} PUBLIC | |||||
| $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> | $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> | ||||
| $<INSTALL_INTERFACE:include> | $<INSTALL_INTERFACE:include> | ||||
| ) | ) | ||||
| target_link_libraries(cblas PRIVATE ${BLAS_LIBRARIES}) | |||||
| lapack_install_library(cblas) | |||||
| target_link_libraries(${CBLASLIB} PRIVATE ${BLAS_LIBRARIES}) | |||||
| lapack_install_library(${CBLASLIB}) | |||||
| @@ -52,9 +52,9 @@ if(BUILD_SINGLE) | |||||
| add_executable(xscblat2 c_sblat2.f ${STESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | add_executable(xscblat2 c_sblat2.f ${STESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | ||||
| add_executable(xscblat3 c_sblat3.f ${STESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | add_executable(xscblat3 c_sblat3.f ${STESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | ||||
| target_link_libraries(xscblat1 cblas) | |||||
| target_link_libraries(xscblat2 cblas) | |||||
| target_link_libraries(xscblat3 cblas) | |||||
| target_link_libraries(xscblat1 ${CBLASLIB}) | |||||
| target_link_libraries(xscblat2 ${CBLASLIB}) | |||||
| target_link_libraries(xscblat3 ${CBLASLIB}) | |||||
| add_cblas_test(stest1.out "" xscblat1) | add_cblas_test(stest1.out "" xscblat1) | ||||
| add_cblas_test(stest2.out sin2 xscblat2) | add_cblas_test(stest2.out sin2 xscblat2) | ||||
| @@ -66,9 +66,9 @@ if(BUILD_DOUBLE) | |||||
| add_executable(xdcblat2 c_dblat2.f ${DTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | add_executable(xdcblat2 c_dblat2.f ${DTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | ||||
| add_executable(xdcblat3 c_dblat3.f ${DTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | add_executable(xdcblat3 c_dblat3.f ${DTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | ||||
| target_link_libraries(xdcblat1 cblas) | |||||
| target_link_libraries(xdcblat2 cblas) | |||||
| target_link_libraries(xdcblat3 cblas) | |||||
| target_link_libraries(xdcblat1 ${CBLASLIB}) | |||||
| target_link_libraries(xdcblat2 ${CBLASLIB}) | |||||
| target_link_libraries(xdcblat3 ${CBLASLIB}) | |||||
| add_cblas_test(dtest1.out "" xdcblat1) | add_cblas_test(dtest1.out "" xdcblat1) | ||||
| add_cblas_test(dtest2.out din2 xdcblat2) | add_cblas_test(dtest2.out din2 xdcblat2) | ||||
| @@ -80,9 +80,9 @@ if(BUILD_COMPLEX) | |||||
| add_executable(xccblat2 c_cblat2.f ${CTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | add_executable(xccblat2 c_cblat2.f ${CTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | ||||
| add_executable(xccblat3 c_cblat3.f ${CTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | add_executable(xccblat3 c_cblat3.f ${CTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | ||||
| target_link_libraries(xccblat1 cblas ${BLAS_LIBRARIES}) | |||||
| target_link_libraries(xccblat2 cblas) | |||||
| target_link_libraries(xccblat3 cblas) | |||||
| target_link_libraries(xccblat1 ${CBLASLIB} ${BLAS_LIBRARIES}) | |||||
| target_link_libraries(xccblat2 ${CBLASLIB}) | |||||
| target_link_libraries(xccblat3 ${CBLASLIB}) | |||||
| add_cblas_test(ctest1.out "" xccblat1) | add_cblas_test(ctest1.out "" xccblat1) | ||||
| add_cblas_test(ctest2.out cin2 xccblat2) | add_cblas_test(ctest2.out cin2 xccblat2) | ||||
| @@ -94,9 +94,9 @@ if(BUILD_COMPLEX16) | |||||
| add_executable(xzcblat2 c_zblat2.f ${ZTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | add_executable(xzcblat2 c_zblat2.f ${ZTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | ||||
| add_executable(xzcblat3 c_zblat3.f ${ZTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | add_executable(xzcblat3 c_zblat3.f ${ZTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | ||||
| target_link_libraries(xzcblat1 cblas) | |||||
| target_link_libraries(xzcblat2 cblas) | |||||
| target_link_libraries(xzcblat3 cblas) | |||||
| target_link_libraries(xzcblat1 ${CBLASLIB}) | |||||
| target_link_libraries(xzcblat2 ${CBLASLIB}) | |||||
| target_link_libraries(xzcblat3 ${CBLASLIB}) | |||||
| add_cblas_test(ztest1.out "" xzcblat1) | add_cblas_test(ztest1.out "" xzcblat1) | ||||
| add_cblas_test(ztest2.out zin2 xzcblat2) | add_cblas_test(ztest2.out zin2 xzcblat2) | ||||
| @@ -14,6 +14,19 @@ macro( CheckLAPACKCompilerFlags ) | |||||
| set( FPE_EXIT FALSE ) | set( FPE_EXIT FALSE ) | ||||
| # FORTRAN ILP default | |||||
| if ( FORTRAN_ILP ) | |||||
| if( CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" ) | |||||
| if ( WIN32 ) | |||||
| set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} /integer-size:64") | |||||
| else () | |||||
| set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -integer-size 64") | |||||
| endif() | |||||
| else() | |||||
| set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-integer-8") | |||||
| endif() | |||||
| endif() | |||||
| # GNU Fortran | # GNU Fortran | ||||
| if( CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" ) | if( CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" ) | ||||
| if( "${CMAKE_Fortran_FLAGS}" MATCHES "-ffpe-trap=[izoupd]") | if( "${CMAKE_Fortran_FLAGS}" MATCHES "-ffpe-trap=[izoupd]") | ||||
| @@ -1,7 +1,7 @@ | |||||
| # Load lapack targets from the build tree if necessary. | # Load lapack targets from the build tree if necessary. | ||||
| set(_LAPACK_TARGET "@_lapack_config_build_guard_target@") | set(_LAPACK_TARGET "@_lapack_config_build_guard_target@") | ||||
| if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}") | if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}") | ||||
| include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") | |||||
| include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake") | |||||
| endif() | endif() | ||||
| unset(_LAPACK_TARGET) | unset(_LAPACK_TARGET) | ||||
| @@ -4,7 +4,7 @@ get_filename_component(_LAPACK_SELF_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) | |||||
| # Load lapack targets from the install tree if necessary. | # Load lapack targets from the install tree if necessary. | ||||
| set(_LAPACK_TARGET "@_lapack_config_install_guard_target@") | set(_LAPACK_TARGET "@_lapack_config_install_guard_target@") | ||||
| if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}") | if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}") | ||||
| include("${_LAPACK_SELF_DIR}/lapack-targets.cmake") | |||||
| include("${_LAPACK_SELF_DIR}/@LAPACKLIB@-targets.cmake") | |||||
| endif() | endif() | ||||
| unset(_LAPACK_TARGET) | unset(_LAPACK_TARGET) | ||||
| @@ -44,6 +44,24 @@ endif() | |||||
| # By default static library | # By default static library | ||||
| option(BUILD_SHARED_LIBS "Build shared libraries" OFF) | option(BUILD_SHARED_LIBS "Build shared libraries" OFF) | ||||
| # By default build index32 library | |||||
| option(BUILD_INDEX64 "Build Index-64 API libraries" OFF) | |||||
| if(BUILD_INDEX64) | |||||
| set(BLASLIB "blas64") | |||||
| set(CBLASLIB "cblas64") | |||||
| set(LAPACKLIB "lapack64") | |||||
| set(LAPACKELIB "lapacke64") | |||||
| set(TMGLIB "tmglib64") | |||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DWeirdNEC -DLAPACK_ILP64 -DHAVE_LAPACK_CONFIG_H") | |||||
| set(FORTRAN_ILP TRUE) | |||||
| else() | |||||
| set(BLASLIB "blas") | |||||
| set(CBLASLIB "cblas") | |||||
| set(LAPACKLIB "lapack") | |||||
| set(LAPACKELIB "lapacke") | |||||
| set(TMGLIB "tmglib") | |||||
| endif() | |||||
| include(GNUInstallDirs) | include(GNUInstallDirs) | ||||
| # Updated OSX RPATH settings | # Updated OSX RPATH settings | ||||
| @@ -73,10 +91,10 @@ include(PreventInBuildInstalls) | |||||
| if(UNIX) | if(UNIX) | ||||
| if(CMAKE_Fortran_COMPILER_ID STREQUAL Intel) | if(CMAKE_Fortran_COMPILER_ID STREQUAL Intel) | ||||
| list(APPEND CMAKE_Fortran_FLAGS "-fp-model strict") | |||||
| set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fp-model strict") | |||||
| endif() | endif() | ||||
| if(CMAKE_Fortran_COMPILER_ID STREQUAL XL) | if(CMAKE_Fortran_COMPILER_ID STREQUAL XL) | ||||
| list(APPEND CMAKE_Fortran_FLAGS "-qnosave -qstrict=none") | |||||
| set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qnosave -qstrict=none") | |||||
| endif() | endif() | ||||
| # Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler. | # Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler. | ||||
| # This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin | # This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin | ||||
| @@ -112,7 +130,7 @@ endif() | |||||
| # -------------------------------------------------- | # -------------------------------------------------- | ||||
| set(LAPACK_INSTALL_EXPORT_NAME lapack-targets) | |||||
| set(LAPACK_INSTALL_EXPORT_NAME ${LAPACKLIB}-targets) | |||||
| macro(lapack_install_library lib) | macro(lapack_install_library lib) | ||||
| install(TARGETS ${lib} | install(TARGETS ${lib} | ||||
| @@ -220,7 +238,7 @@ endif() | |||||
| if(NOT BLAS_FOUND) | if(NOT BLAS_FOUND) | ||||
| message(STATUS "Using supplied NETLIB BLAS implementation") | message(STATUS "Using supplied NETLIB BLAS implementation") | ||||
| add_subdirectory(BLAS) | add_subdirectory(BLAS) | ||||
| set(BLAS_LIBRARIES blas) | |||||
| set(BLAS_LIBRARIES ${BLASLIB}) | |||||
| else() | else() | ||||
| set(CMAKE_EXE_LINKER_FLAGS | set(CMAKE_EXE_LINKER_FLAGS | ||||
| "${CMAKE_EXE_LINKER_FLAGS} ${BLAS_LINKER_FLAGS}" | "${CMAKE_EXE_LINKER_FLAGS} ${BLAS_LINKER_FLAGS}" | ||||
| @@ -279,7 +297,7 @@ endif() | |||||
| # Neither user specified or optimized LAPACK libraries can be used | # Neither user specified or optimized LAPACK libraries can be used | ||||
| if(NOT LATESTLAPACK_FOUND) | if(NOT LATESTLAPACK_FOUND) | ||||
| message(STATUS "Using supplied NETLIB LAPACK implementation") | message(STATUS "Using supplied NETLIB LAPACK implementation") | ||||
| set(LAPACK_LIBRARIES lapack) | |||||
| set(LAPACK_LIBRARIES ${LAPACKLIB}) | |||||
| add_subdirectory(SRC) | add_subdirectory(SRC) | ||||
| else() | else() | ||||
| set(CMAKE_EXE_LINKER_FLAGS | set(CMAKE_EXE_LINKER_FLAGS | ||||
| @@ -371,23 +389,23 @@ include(CPack) | |||||
| # -------------------------------------------------- | # -------------------------------------------------- | ||||
| if(NOT BLAS_FOUND) | if(NOT BLAS_FOUND) | ||||
| set(ALL_TARGETS ${ALL_TARGETS} blas) | |||||
| set(ALL_TARGETS ${ALL_TARGETS} ${BLASLIB}) | |||||
| endif() | endif() | ||||
| if(NOT LATESTLAPACK_FOUND) | if(NOT LATESTLAPACK_FOUND) | ||||
| set(ALL_TARGETS ${ALL_TARGETS} lapack) | |||||
| set(ALL_TARGETS ${ALL_TARGETS} ${LAPACKLIB}) | |||||
| endif() | endif() | ||||
| if(BUILD_TESTING OR LAPACKE_WITH_TMG) | if(BUILD_TESTING OR LAPACKE_WITH_TMG) | ||||
| set(ALL_TARGETS ${ALL_TARGETS} tmglib) | |||||
| set(ALL_TARGETS ${ALL_TARGETS} ${TMGLIB}) | |||||
| endif() | endif() | ||||
| # Export lapack targets, not including lapacke, from the | # Export lapack targets, not including lapacke, from the | ||||
| # install tree, if any. | # install tree, if any. | ||||
| set(_lapack_config_install_guard_target "") | set(_lapack_config_install_guard_target "") | ||||
| if(ALL_TARGETS) | if(ALL_TARGETS) | ||||
| install(EXPORT lapack-targets | |||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION} | |||||
| install(EXPORT ${LAPACKLIB}-targets | |||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKLIB}-${LAPACK_VERSION} | |||||
| COMPONENT Development | COMPONENT Development | ||||
| ) | ) | ||||
| @@ -398,18 +416,18 @@ endif() | |||||
| # Include cblas in targets exported from the build tree. | # Include cblas in targets exported from the build tree. | ||||
| if(CBLAS) | if(CBLAS) | ||||
| set(ALL_TARGETS ${ALL_TARGETS} cblas) | |||||
| set(ALL_TARGETS ${ALL_TARGETS} ${CBLASLIB}) | |||||
| endif() | endif() | ||||
| # Include lapacke in targets exported from the build tree. | # Include lapacke in targets exported from the build tree. | ||||
| if(LAPACKE) | if(LAPACKE) | ||||
| set(ALL_TARGETS ${ALL_TARGETS} lapacke) | |||||
| set(ALL_TARGETS ${ALL_TARGETS} ${LAPACKELIB}) | |||||
| endif() | endif() | ||||
| # Export lapack and lapacke targets from the build tree, if any. | # Export lapack and lapacke targets from the build tree, if any. | ||||
| set(_lapack_config_build_guard_target "") | set(_lapack_config_build_guard_target "") | ||||
| if(ALL_TARGETS) | if(ALL_TARGETS) | ||||
| export(TARGETS ${ALL_TARGETS} FILE lapack-targets.cmake) | |||||
| export(TARGETS ${ALL_TARGETS} FILE ${LAPACKLIB}-targets.cmake) | |||||
| # Choose one of the lapack or lapacke targets to use as a guard | # Choose one of the lapack or lapacke targets to use as a guard | ||||
| # for lapack-config.cmake to load targets from the build tree. | # for lapack-config.cmake to load targets from the build tree. | ||||
| @@ -417,30 +435,30 @@ if(ALL_TARGETS) | |||||
| endif() | endif() | ||||
| configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-build.cmake.in | configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-build.cmake.in | ||||
| ${LAPACK_BINARY_DIR}/lapack-config.cmake @ONLY) | |||||
| ${LAPACK_BINARY_DIR}/${LAPACKLIB}-config.cmake @ONLY) | |||||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_DIR}/lapack.pc @ONLY) | |||||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKLIB}.pc @ONLY) | |||||
| install(FILES | install(FILES | ||||
| ${CMAKE_CURRENT_BINARY_DIR}/lapack.pc | |||||
| ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKLIB}.pc | |||||
| DESTINATION ${PKG_CONFIG_DIR} | DESTINATION ${PKG_CONFIG_DIR} | ||||
| COMPONENT Development | COMPONENT Development | ||||
| ) | ) | ||||
| configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-install.cmake.in | configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-install.cmake.in | ||||
| ${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake @ONLY) | |||||
| ${LAPACK_BINARY_DIR}/CMakeFiles/${LAPACKLIB}-config.cmake @ONLY) | |||||
| include(CMakePackageConfigHelpers) | include(CMakePackageConfigHelpers) | ||||
| write_basic_package_version_file( | write_basic_package_version_file( | ||||
| ${LAPACK_BINARY_DIR}/lapack-config-version.cmake | |||||
| ${LAPACK_BINARY_DIR}/${LAPACKLIB}-config-version.cmake | |||||
| VERSION ${LAPACK_VERSION} | VERSION ${LAPACK_VERSION} | ||||
| COMPATIBILITY SameMajorVersion | COMPATIBILITY SameMajorVersion | ||||
| ) | ) | ||||
| install(FILES | install(FILES | ||||
| ${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake | |||||
| ${LAPACK_BINARY_DIR}/lapack-config-version.cmake | |||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION} | |||||
| ${LAPACK_BINARY_DIR}/CMakeFiles/${LAPACKLIB}-config.cmake | |||||
| ${LAPACK_BINARY_DIR}/${LAPACKLIB}-config-version.cmake | |||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKLIB}-${LAPACK_VERSION} | |||||
| COMPONENT Development | COMPONENT Development | ||||
| ) | ) | ||||
| @@ -1,7 +1,7 @@ | |||||
| message(STATUS "LAPACKE enable") | message(STATUS "LAPACKE enable") | ||||
| enable_language(C) | enable_language(C) | ||||
| set(LAPACK_INSTALL_EXPORT_NAME lapacke-targets) | |||||
| set(LAPACK_INSTALL_EXPORT_NAME ${LAPACKELIB}-targets) | |||||
| # Create a header file lapacke_mangling.h for the routines called in my C programs | # Create a header file lapacke_mangling.h for the routines called in my C programs | ||||
| include(FortranCInterface) | include(FortranCInterface) | ||||
| @@ -72,28 +72,28 @@ if(LAPACKE_WITH_TMG) | |||||
| endif() | endif() | ||||
| list(APPEND SOURCES ${UTILS}) | list(APPEND SOURCES ${UTILS}) | ||||
| add_library(lapacke ${SOURCES}) | |||||
| add_library(${LAPACKELIB} ${SOURCES}) | |||||
| set_target_properties( | set_target_properties( | ||||
| lapacke PROPERTIES | |||||
| ${LAPACKELIB} PROPERTIES | |||||
| LINKER_LANGUAGE C | LINKER_LANGUAGE C | ||||
| VERSION ${LAPACK_VERSION} | VERSION ${LAPACK_VERSION} | ||||
| SOVERSION ${LAPACK_MAJOR_VERSION} | SOVERSION ${LAPACK_MAJOR_VERSION} | ||||
| ) | ) | ||||
| target_include_directories(lapacke PUBLIC | |||||
| target_include_directories(${LAPACKELIB} PUBLIC | |||||
| $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> | $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> | ||||
| $<INSTALL_INTERFACE:include> | $<INSTALL_INTERFACE:include> | ||||
| ) | ) | ||||
| if(WIN32 AND NOT UNIX) | if(WIN32 AND NOT UNIX) | ||||
| target_compile_definitions(lapacke PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE) | |||||
| target_compile_definitions(${LAPACKELIB} PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE) | |||||
| message(STATUS "Windows BUILD") | message(STATUS "Windows BUILD") | ||||
| endif() | endif() | ||||
| if(LAPACKE_WITH_TMG) | if(LAPACKE_WITH_TMG) | ||||
| target_link_libraries(lapacke PRIVATE tmglib) | |||||
| target_link_libraries(${LAPACKELIB} PRIVATE ${TMGLIB}) | |||||
| endif() | endif() | ||||
| target_link_libraries(lapacke PRIVATE ${LAPACK_LIBRARIES}) | |||||
| target_link_libraries(${LAPACKELIB} PRIVATE ${LAPACK_LIBRARIES}) | |||||
| lapack_install_library(lapacke) | |||||
| lapack_install_library(${LAPACKELIB}) | |||||
| install( | install( | ||||
| FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h | FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h | ||||
| DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} | ||||
| @@ -105,28 +105,28 @@ if(BUILD_TESTING) | |||||
| endif() | endif() | ||||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc @ONLY) | |||||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKELIB}.pc @ONLY) | |||||
| install(FILES | install(FILES | ||||
| ${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc | |||||
| ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKELIB}.pc | |||||
| DESTINATION ${PKG_CONFIG_DIR} | DESTINATION ${PKG_CONFIG_DIR} | ||||
| COMPONENT Development | COMPONENT Development | ||||
| ) | ) | ||||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-version.cmake.in | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-version.cmake.in | ||||
| ${LAPACK_BINARY_DIR}/lapacke-config-version.cmake @ONLY) | |||||
| ${LAPACK_BINARY_DIR}/${LAPACKELIB}-config-version.cmake @ONLY) | |||||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-build.cmake.in | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-build.cmake.in | ||||
| ${LAPACK_BINARY_DIR}/lapacke-config.cmake @ONLY) | |||||
| ${LAPACK_BINARY_DIR}/${LAPACKELIB}-config.cmake @ONLY) | |||||
| configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-install.cmake.in | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-install.cmake.in | ||||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake @ONLY) | |||||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${LAPACKELIB}-config.cmake @ONLY) | |||||
| install(FILES | install(FILES | ||||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake | |||||
| ${LAPACK_BINARY_DIR}/lapacke-config-version.cmake | |||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION} | |||||
| ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${LAPACKELIB}-config.cmake | |||||
| ${LAPACK_BINARY_DIR}/${LAPACKELIB}-config-version.cmake | |||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKELIB}-${LAPACK_VERSION} | |||||
| COMPONENT Development | COMPONENT Development | ||||
| ) | ) | ||||
| install(EXPORT lapacke-targets | |||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION} | |||||
| install(EXPORT ${LAPACKELIB}-targets | |||||
| DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKELIB}-${LAPACK_VERSION} | |||||
| COMPONENT Development | COMPONENT Development | ||||
| ) | ) | ||||
| @@ -3,8 +3,8 @@ set(LAPACK_DIR "@LAPACK_BINARY_DIR@") | |||||
| find_package(LAPACK NO_MODULE) | find_package(LAPACK NO_MODULE) | ||||
| # Load lapack targets from the build tree, including lapacke targets. | # Load lapack targets from the build tree, including lapacke targets. | ||||
| if(NOT TARGET lapacke) | |||||
| include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") | |||||
| if(NOT TARGET @LAPACKELIB@) | |||||
| include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake") | |||||
| endif() | endif() | ||||
| # Hint for project building against lapack | # Hint for project building against lapack | ||||
| @@ -14,4 +14,4 @@ set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID}) | |||||
| set(LAPACKE_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include") | set(LAPACKE_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include") | ||||
| # Report lapacke libraries. | # Report lapacke libraries. | ||||
| set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES}) | |||||
| set(LAPACKE_LIBRARIES @LAPACKELIB@ ${LAPACK_LIBRARIES}) | |||||
| @@ -5,12 +5,12 @@ get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH) | |||||
| get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH) | get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH) | ||||
| # Load the LAPACK package with which we were built. | # Load the LAPACK package with which we were built. | ||||
| set(LAPACK_DIR "${_LAPACKE_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/lapack-@LAPACK_VERSION@") | |||||
| set(LAPACK_DIR "${_LAPACKE_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/@LAPACK@-@LAPACK_VERSION@") | |||||
| find_package(LAPACK NO_MODULE) | find_package(LAPACK NO_MODULE) | ||||
| # Load lapacke targets from the install tree. | # Load lapacke targets from the install tree. | ||||
| if(NOT TARGET lapacke) | |||||
| include(${_LAPACKE_SELF_DIR}/lapacke-targets.cmake) | |||||
| if(NOT TARGET @LAPACKELIB@) | |||||
| include(${_LAPACKE_SELF_DIR}/@LAPACKELIB@-targets.cmake) | |||||
| endif() | endif() | ||||
| # Hint for project building against lapack | # Hint for project building against lapack | ||||
| @@ -20,7 +20,7 @@ set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID}) | |||||
| set(LAPACKE_INCLUDE_DIRS ${_LAPACKE_PREFIX}/include) | set(LAPACKE_INCLUDE_DIRS ${_LAPACKE_PREFIX}/include) | ||||
| # Report lapacke libraries. | # Report lapacke libraries. | ||||
| set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES}) | |||||
| set(LAPACKE_LIBRARIES @LAPACKELIB@ ${LAPACK_LIBRARIES}) | |||||
| unset(_LAPACKE_PREFIX) | unset(_LAPACKE_PREFIX) | ||||
| unset(_LAPACKE_SELF_DIR) | unset(_LAPACKE_SELF_DIR) | ||||
| @@ -3,10 +3,10 @@ add_executable(xexample_DGESV_colmajor example_DGESV_colmajor.c lapacke_example_ | |||||
| add_executable(xexample_DGELS_rowmajor example_DGELS_rowmajor.c lapacke_example_aux.c lapacke_example_aux.h) | add_executable(xexample_DGELS_rowmajor example_DGELS_rowmajor.c lapacke_example_aux.c lapacke_example_aux.h) | ||||
| add_executable(xexample_DGELS_colmajor example_DGELS_colmajor.c lapacke_example_aux.c lapacke_example_aux.h) | add_executable(xexample_DGELS_colmajor example_DGELS_colmajor.c lapacke_example_aux.c lapacke_example_aux.h) | ||||
| target_link_libraries(xexample_DGESV_rowmajor lapacke) | |||||
| target_link_libraries(xexample_DGESV_colmajor lapacke) | |||||
| target_link_libraries(xexample_DGELS_rowmajor lapacke) | |||||
| target_link_libraries(xexample_DGELS_colmajor lapacke) | |||||
| target_link_libraries(xexample_DGESV_rowmajor ${LAPACKELIB}) | |||||
| target_link_libraries(xexample_DGESV_colmajor ${LAPACKELIB}) | |||||
| target_link_libraries(xexample_DGELS_rowmajor ${LAPACKELIB}) | |||||
| target_link_libraries(xexample_DGELS_colmajor ${LAPACKELIB}) | |||||
| add_test(example_DGESV_rowmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_rowmajor) | add_test(example_DGESV_rowmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_rowmajor) | ||||
| add_test(example_DGESV_colmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_colmajor) | add_test(example_DGESV_colmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_colmajor) | ||||
| @@ -49,12 +49,13 @@ extern "C" { | |||||
| #endif /* __cplusplus */ | #endif /* __cplusplus */ | ||||
| #include <stdlib.h> | #include <stdlib.h> | ||||
| #include <stdint.h> | |||||
| #ifndef lapack_int | #ifndef lapack_int | ||||
| #if defined(LAPACK_ILP64) | #if defined(LAPACK_ILP64) | ||||
| #define lapack_int long | |||||
| #define lapack_int int64_t | |||||
| #else | #else | ||||
| #define lapack_int int | |||||
| #define lapack_int int32_t | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -67,7 +67,11 @@ extern "C" { | |||||
| void LAPACKE_xerbla( const char *name, lapack_int info ); | void LAPACKE_xerbla( const char *name, lapack_int info ); | ||||
| /* Compare two chars (case-insensitive) */ | /* Compare two chars (case-insensitive) */ | ||||
| lapack_logical LAPACKE_lsame( char ca, char cb ); | |||||
| lapack_logical LAPACKE_lsame( char ca, char cb ) | |||||
| #if defined __GNUC__ | |||||
| __attribute__((const)) | |||||
| #endif | |||||
| ; | |||||
| /* Functions to convert column-major to row-major 2d arrays and vice versa. */ | /* Functions to convert column-major to row-major 2d arrays and vice versa. */ | ||||
| void LAPACKE_cgb_trans( int matrix_layout, lapack_int m, lapack_int n, | void LAPACKE_cgb_trans( int matrix_layout, lapack_int m, lapack_int n, | ||||
| @@ -5,6 +5,6 @@ Name: LAPACKE | |||||
| Description: C Standard Interface to LAPACK Linear Algebra PACKage | Description: C Standard Interface to LAPACK Linear Algebra PACKage | ||||
| Version: @LAPACK_VERSION@ | Version: @LAPACK_VERSION@ | ||||
| URL: http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack | URL: http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack | ||||
| Libs: -L${libdir} -llapacke | |||||
| Libs: -L${libdir} -l@LAPACKELIB@ | |||||
| Cflags: -I${includedir} | Cflags: -I${includedir} | ||||
| Requires.private: lapack | |||||
| Requires.private: @LAPACKLIB@ | |||||
| @@ -500,21 +500,21 @@ if(BUILD_COMPLEX16) | |||||
| endif() | endif() | ||||
| list(REMOVE_DUPLICATES SOURCES) | list(REMOVE_DUPLICATES SOURCES) | ||||
| add_library(lapack ${SOURCES}) | |||||
| add_library(${LAPACKLIB} ${SOURCES}) | |||||
| set_target_properties( | set_target_properties( | ||||
| lapack PROPERTIES | |||||
| ${LAPACKLIB} PROPERTIES | |||||
| VERSION ${LAPACK_VERSION} | VERSION ${LAPACK_VERSION} | ||||
| SOVERSION ${LAPACK_MAJOR_VERSION} | SOVERSION ${LAPACK_MAJOR_VERSION} | ||||
| ) | ) | ||||
| if(USE_XBLAS) | if(USE_XBLAS) | ||||
| target_link_libraries(lapack PRIVATE ${XBLAS_LIBRARY}) | |||||
| target_link_libraries(${LAPACKLIB} PRIVATE ${XBLAS_LIBRARY}) | |||||
| endif() | endif() | ||||
| target_link_libraries(lapack PRIVATE ${BLAS_LIBRARIES}) | |||||
| target_link_libraries(${LAPACKLIB} PRIVATE ${BLAS_LIBRARIES}) | |||||
| if(_is_coverage_build) | if(_is_coverage_build) | ||||
| target_link_libraries(lapack PRIVATE gcov) | |||||
| add_coverage(lapack) | |||||
| target_link_libraries(${LAPACKLIB} PRIVATE gcov) | |||||
| add_coverage(${LAPACKLIB}) | |||||
| endif() | endif() | ||||
| lapack_install_library(lapack) | |||||
| lapack_install_library(${LAPACKLIB}) | |||||
| @@ -47,6 +47,6 @@ if(BUILD_COMPLEX16) | |||||
| endif() | endif() | ||||
| list(REMOVE_DUPLICATES SOURCES) | list(REMOVE_DUPLICATES SOURCES) | ||||
| add_library(tmglib ${SOURCES}) | |||||
| target_link_libraries(tmglib ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) | |||||
| lapack_install_library(tmglib) | |||||
| add_library(${TMGLIB} ${SOURCES}) | |||||
| target_link_libraries(${TMGLIB} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) | |||||
| lapack_install_library(${TMGLIB}) | |||||
| @@ -3128,9 +3128,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define SYMV_P 16 | #define SYMV_P 16 | ||||
| #if defined(CORTEXA57) || \ | |||||
| #if defined(CORTEXA57) || defined(CORTEXX1) || \ | |||||
| defined(CORTEXA72) || defined(CORTEXA73) || \ | defined(CORTEXA72) || defined(CORTEXA73) || \ | ||||
| defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) | |||||
| defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000) | |||||
| #define SGEMM_DEFAULT_UNROLL_M 16 | #define SGEMM_DEFAULT_UNROLL_M 16 | ||||
| #define SGEMM_DEFAULT_UNROLL_N 4 | #define SGEMM_DEFAULT_UNROLL_N 4 | ||||
| @@ -3147,7 +3147,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| /*FIXME: this should be using the cache size, but there is currently no easy way to | /*FIXME: this should be using the cache size, but there is currently no easy way to | ||||
| query that on ARM. So if getarch counted more than 8 cores we simply assume the host | query that on ARM. So if getarch counted more than 8 cores we simply assume the host | ||||
| is a big desktop or server with abundant cache rather than a phone or embedded device */ | is a big desktop or server with abundant cache rather than a phone or embedded device */ | ||||
| #if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) | |||||
| #if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)|| defined(CORTEXX1) | |||||
| #define SGEMM_DEFAULT_P 512 | #define SGEMM_DEFAULT_P 512 | ||||
| #define DGEMM_DEFAULT_P 256 | #define DGEMM_DEFAULT_P 256 | ||||
| #define CGEMM_DEFAULT_P 256 | #define CGEMM_DEFAULT_P 256 | ||||
| @@ -3377,7 +3377,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d | |||||
| #define CGEMM_DEFAULT_R 4096 | #define CGEMM_DEFAULT_R 4096 | ||||
| #define ZGEMM_DEFAULT_R 4096 | #define ZGEMM_DEFAULT_R 4096 | ||||
| #elif defined(ARMV8SVE) || defined(A64FX) | |||||
| #elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510)|| defined(CORTEXA710) || defined(CORTEXX2) | |||||
| /* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl". | /* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl". | ||||
| Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */ | Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */ | ||||
| @@ -3423,8 +3423,8 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout | |||||
| #define SGEMM_DEFAULT_UNROLL_M 16 | #define SGEMM_DEFAULT_UNROLL_M 16 | ||||
| #define SGEMM_DEFAULT_UNROLL_N 4 | #define SGEMM_DEFAULT_UNROLL_N 4 | ||||
| #define DGEMM_DEFAULT_UNROLL_M 4 | |||||
| #define DGEMM_DEFAULT_UNROLL_N 8 | |||||
| #define DGEMM_DEFAULT_UNROLL_M 8 | |||||
| #define DGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define CGEMM_DEFAULT_UNROLL_M 8 | #define CGEMM_DEFAULT_UNROLL_M 8 | ||||
| #define CGEMM_DEFAULT_UNROLL_N 4 | #define CGEMM_DEFAULT_UNROLL_N 4 | ||||
| @@ -115,7 +115,7 @@ | |||||
| #define INCLUDE_CTGSYL INCLUDE_XTGSYL | #define INCLUDE_CTGSYL INCLUDE_XTGSYL | ||||
| #define INCLUDE_ZTGSYL INCLUDE_XTGSYL | #define INCLUDE_ZTGSYL INCLUDE_XTGSYL | ||||
| #define INCLUDE_XGEMMT 0 | |||||
| #define INCLUDE_XGEMMT 1 | |||||
| #define INCLUDE_SGEMMT INCLUDE_XGEMMT | #define INCLUDE_SGEMMT INCLUDE_XGEMMT | ||||
| #define INCLUDE_DGEMMT INCLUDE_XGEMMT | #define INCLUDE_DGEMMT INCLUDE_XGEMMT | ||||
| #define INCLUDE_CGEMMT INCLUDE_XGEMMT | #define INCLUDE_CGEMMT INCLUDE_XGEMMT | ||||
| @@ -566,7 +566,8 @@ void LAPACK(sgemmt)( | |||||
| const float *B, const blasint *ldB, | const float *B, const blasint *ldB, | ||||
| const float *beta, float *C, const blasint *ldC | const float *beta, float *C, const blasint *ldC | ||||
| ) { | ) { | ||||
| RELAPACK_sgemmt(uplo, n, A, ldA, info); | |||||
| blasint info; | |||||
| RELAPACK_sgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info); | |||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -578,7 +579,8 @@ void LAPACK(dgemmt)( | |||||
| const double *B, const blasint *ldB, | const double *B, const blasint *ldB, | ||||
| const double *beta, double *C, const blasint *ldC | const double *beta, double *C, const blasint *ldC | ||||
| ) { | ) { | ||||
| RELAPACK_dgemmt(uplo, n, A, ldA, info); | |||||
| blasint info; | |||||
| RELAPACK_dgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info); | |||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -590,7 +592,8 @@ void LAPACK(cgemmt)( | |||||
| const float *B, const blasint *ldB, | const float *B, const blasint *ldB, | ||||
| const float *beta, float *C, const blasint *ldC | const float *beta, float *C, const blasint *ldC | ||||
| ) { | ) { | ||||
| RELAPACK_cgemmt(uplo, n, A, ldA, info); | |||||
| blasint info; | |||||
| RELAPACK_cgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info); | |||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -602,6 +605,7 @@ void LAPACK(zgemmt)( | |||||
| const double *B, const blasint *ldB, | const double *B, const blasint *ldB, | ||||
| const double *beta, double *C, const blasint *ldC | const double *beta, double *C, const blasint *ldC | ||||
| ) { | ) { | ||||
| RELAPACK_zgemmt(uplo, n, A, ldA, info); | |||||
| blasint info; | |||||
| RELAPACK_zgemmt(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, info); | |||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -30,6 +30,10 @@ if(WIN32) | |||||
| FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.ps1 | FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.ps1 | ||||
| "if (Test-Path $args[2]) { Remove-Item -Force $args[2] } \n" | "if (Test-Path $args[2]) { Remove-Item -Force $args[2] } \n" | ||||
| "$ErrorActionPreference = \"Stop\"\n" | "$ErrorActionPreference = \"Stop\"\n" | ||||
| "If ((Get-Content $args[1] | & file - | %{$_ -match \"BOM\"}) -contains $true) {\n" | |||||
| "echo 'Skipped due to wrong input encoding'\n" | |||||
| "exit 0\n" | |||||
| "}\n" | |||||
| "Get-Content $args[1] | & $args[0]\n" | "Get-Content $args[1] | & $args[0]\n" | ||||
| "If ((Get-Content $args[2] | %{$_ -match \"FATAL\"}) -contains $true) {\n" | "If ((Get-Content $args[2] | %{$_ -match \"FATAL\"}) -contains $true) {\n" | ||||
| "echo Error\n" | "echo Error\n" | ||||