| @@ -1711,6 +1711,8 @@ ifndef NO_MSA | |||||
| export HAVE_MSA | export HAVE_MSA | ||||
| export MSA_FLAGS | export MSA_FLAGS | ||||
| endif | endif | ||||
| export HAVE_LSX | |||||
| export HAVE_LASX | |||||
| export KERNELDIR | export KERNELDIR | ||||
| export FUNCTION_PROFILE | export FUNCTION_PROFILE | ||||
| export TARGET_CORE | export TARGET_CORE | ||||
| @@ -182,6 +182,39 @@ if [ "$architecture" = "mips" ] || [ "$architecture" = "mips64" ]; then | |||||
| rm -rf "$tmpd" | rm -rf "$tmpd" | ||||
| fi | fi | ||||
| have_lsx=0 | |||||
| have_lasx=0 | |||||
| if [ "$architecture" = "loongarch64" ]; then | |||||
| tmpd="$(mktemp -d)" | |||||
| tmplsx="$tmpd/lsx.c" | |||||
| codelsx='"vadd.b $vr0, $vr0, $vr0"' | |||||
| lsx_flags='-march=loongarch64 -mlsx -mabi=lp64d' | |||||
| printf "#include <lsxintrin.h>\n\n" >> "$tmplsx" | |||||
| printf "void main(void){ __asm__ volatile(%s);}\n" "$codelsx" >> "$tmplsx" | |||||
| args="$lsx_flags -o $tmplsx.o $tmplsx" | |||||
| have_lsx=1 | |||||
| { | |||||
| $compiler_name $flags $args >/dev/null 2>&1 | |||||
| } || { | |||||
| have_lsx=0 | |||||
| } | |||||
| tmplasx="$tmpd/lasx.c" | |||||
| codelasx='"xvadd.b $xr0, $xr0, $xr0"' | |||||
| lasx_flags='-march=loongarch64 -mlasx -mabi=lp64d' | |||||
| printf "#include <lasxintrin.h>\n\n" >> "$tmplasx" | |||||
| printf "void main(void){ __asm__ volatile(%s);}\n" "$codelasx" >> "$tmplasx" | |||||
| args="$lasx_flags -o $tmplasx.o $tmplasx" | |||||
| have_lasx=1 | |||||
| { | |||||
| $compiler_name $flags $args >/dev/null 2>&1 | |||||
| } || { | |||||
| have_lasx=0 | |||||
| } | |||||
| rm -rf "$tmpd" | |||||
| fi | |||||
| case "$data" in | case "$data" in | ||||
| *ARCH_X86_64*) architecture=x86_64 ;; | *ARCH_X86_64*) architecture=x86_64 ;; | ||||
| *ARCH_X86*) architecture=x86 ;; | *ARCH_X86*) architecture=x86 ;; | ||||
| @@ -383,6 +416,8 @@ done | |||||
| [ "$no_avx512" -eq 1 ] && printf "NO_AVX512=1\n" | [ "$no_avx512" -eq 1 ] && printf "NO_AVX512=1\n" | ||||
| [ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n" | [ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n" | ||||
| [ "$oldgcc" -eq 1 ] && printf "OLDGCC=1\n" | [ "$oldgcc" -eq 1 ] && printf "OLDGCC=1\n" | ||||
| [ "$have_lsx" -eq 1 ] && printf "HAVE_LSX=1\n" | |||||
| [ "$have_lasx" -eq 1 ] && printf "HAVE_LASX=1\n" | |||||
| } >> "$makefile" | } >> "$makefile" | ||||
| os=`echo "$os" | tr '[[:lower:]]' '[[:upper:]]'/ ` | os=`echo "$os" | tr '[[:lower:]]' '[[:upper:]]'/ ` | ||||
| @@ -397,6 +432,8 @@ compiler=`echo "$compiler" | tr '[[:lower:]]' '[[:upper:]]' ` | |||||
| [ "$binformat" = "bin64" ] && printf "#define __64BIT__\t1\n" | [ "$binformat" = "bin64" ] && printf "#define __64BIT__\t1\n" | ||||
| [ -n "$need_fu" ] && printf "#define FUNDERSCORE\t%s\n" "$need_fu" | [ -n "$need_fu" ] && printf "#define FUNDERSCORE\t%s\n" "$need_fu" | ||||
| [ "$have_msa" -eq 1 ] && printf "#define HAVE_MSA\t1\n" | [ "$have_msa" -eq 1 ] && printf "#define HAVE_MSA\t1\n" | ||||
| [ "$have_lsx" -eq 1 ] && printf "#define HAVE_LSX\t1\n" | |||||
| [ "$have_lasx" -eq 1 ] && printf "#define HAVE_LASX\t1\n" | |||||
| [ "$c11_atomics" -eq 1 ] && printf "#define HAVE_C11\t1\n" | [ "$c11_atomics" -eq 1 ] && printf "#define HAVE_C11\t1\n" | ||||
| } >> "$config" | } >> "$config" | ||||
| @@ -232,6 +232,47 @@ if (($architecture eq "mips") || ($architecture eq "mips64")) { | |||||
| } | } | ||||
| } | } | ||||
| $have_lsx = 0; | |||||
| $have_lasx = 0; | |||||
| if (($architecture eq "loongarch64")) { | |||||
| eval "use File::Temp qw(tempfile)"; | |||||
| if ($@){ | |||||
| warn "could not load PERL module File::Temp, so could not check LSX and LASX capatibility"; | |||||
| } else { | |||||
| $tmplsx = new File::Temp( SUFFIX => '.c' , UNLINK => 1 ); | |||||
| $codelsx = '"vadd.b $vr0, $vr0, $vr0"'; | |||||
| $lsx_flags = "-march=loongarch64 -mlsx -mabi=lp64d"; | |||||
| print $tmplsx "#include <lsxintrin.h>\n\n"; | |||||
| print $tmplsx "void main(void){ __asm__ volatile($codelsx); }\n"; | |||||
| $args = "$lsx_flags -o $tmplsx.o $tmplsx"; | |||||
| my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); | |||||
| system(@cmd) == 0; | |||||
| if ($? != 0) { | |||||
| $have_lsx = 0; | |||||
| } else { | |||||
| $have_lsx = 1; | |||||
| } | |||||
| unlink("$tmplsx.o"); | |||||
| $tmplasx = new File::Temp( SUFFIX => '.c' , UNLINK => 1 ); | |||||
| $codelasx = '"xvadd.b $xr0, $xr0, $xr0"'; | |||||
| $lasx_flags = "-march=loongarch64 -mlasx -mabi=lp64d"; | |||||
| print $tmplasx "#include <lasxintrin.h>\n\n"; | |||||
| print $tmplasx "void main(void){ __asm__ volatile($codelasx); }\n"; | |||||
| $args = "$lasx_flags -o $tmplasx.o $tmplasx"; | |||||
| my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); | |||||
| system(@cmd) == 0; | |||||
| if ($? != 0) { | |||||
| $have_lasx = 0; | |||||
| } else { | |||||
| $have_lasx = 1; | |||||
| } | |||||
| unlink("$tmplasx.o"); | |||||
| } | |||||
| } | |||||
| $architecture = x86 if ($data =~ /ARCH_X86/); | $architecture = x86 if ($data =~ /ARCH_X86/); | ||||
| $architecture = x86_64 if ($data =~ /ARCH_X86_64/); | $architecture = x86_64 if ($data =~ /ARCH_X86_64/); | ||||
| $architecture = e2k if ($data =~ /ARCH_E2K/); | $architecture = e2k if ($data =~ /ARCH_E2K/); | ||||
| @@ -419,6 +460,8 @@ print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross != 0 && $cross_suffix ne | |||||
| print MAKEFILE "CROSS=1\n" if $cross != 0; | print MAKEFILE "CROSS=1\n" if $cross != 0; | ||||
| print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n"; | print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n"; | ||||
| print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1; | print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1; | ||||
| print MAKEFILE "HAVE_LSX=1\n" if $have_lsx eq 1; | |||||
| print MAKEFILE "HAVE_LASX=1\n" if $have_lasx eq 1; | |||||
| print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1; | print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1; | ||||
| print MAKEFILE "NO_RV64GV=1\n" if $no_rv64gv eq 1; | print MAKEFILE "NO_RV64GV=1\n" if $no_rv64gv eq 1; | ||||
| print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1; | print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1; | ||||
| @@ -436,6 +479,8 @@ print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32; | |||||
| print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64; | print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64; | ||||
| print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne ""; | print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne ""; | ||||
| print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1; | print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1; | ||||
| print CONFFILE "#define HAVE_LSX\t1\n" if $have_lsx eq 1; | |||||
| print CONFFILE "#define HAVE_LASX\t1\n" if $have_lasx eq 1; | |||||
| print CONFFILE "#define HAVE_C11\t1\n" if $c11_atomics eq 1; | print CONFFILE "#define HAVE_C11\t1\n" if $c11_atomics eq 1; | ||||
| @@ -1,3 +1,4 @@ | |||||
| ifdef HAVE_LASX | |||||
| DGEMMKERNEL = dgemm_kernel_16x4.S | DGEMMKERNEL = dgemm_kernel_16x4.S | ||||
| DGEMMINCOPY = dgemm_ncopy_16.S | DGEMMINCOPY = dgemm_ncopy_16.S | ||||
| DGEMMITCOPY = dgemm_tcopy_16.S | DGEMMITCOPY = dgemm_tcopy_16.S | ||||
| @@ -7,6 +8,7 @@ DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | ||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | ||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | ||||
| endif | |||||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | ||||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | ||||
| @@ -2843,15 +2843,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define GEMM_DEFAULT_OFFSET_B 0 | #define GEMM_DEFAULT_OFFSET_B 0 | ||||
| #define GEMM_DEFAULT_ALIGN 0x0ffffUL | #define GEMM_DEFAULT_ALIGN 0x0ffffUL | ||||
| #define SGEMM_DEFAULT_UNROLL_N 8 | |||||
| #if defined(HAVE_LASX) | |||||
| #define DGEMM_DEFAULT_UNROLL_N 4 | #define DGEMM_DEFAULT_UNROLL_N 4 | ||||
| #define DGEMM_DEFAULT_UNROLL_M 16 | |||||
| #else | |||||
| #define DGEMM_DEFAULT_UNROLL_N 8 | |||||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||||
| #endif | |||||
| #define SGEMM_DEFAULT_UNROLL_N 8 | |||||
| #define QGEMM_DEFAULT_UNROLL_N 2 | #define QGEMM_DEFAULT_UNROLL_N 2 | ||||
| #define CGEMM_DEFAULT_UNROLL_N 4 | #define CGEMM_DEFAULT_UNROLL_N 4 | ||||
| #define ZGEMM_DEFAULT_UNROLL_N 4 | #define ZGEMM_DEFAULT_UNROLL_N 4 | ||||
| #define XGEMM_DEFAULT_UNROLL_N 1 | #define XGEMM_DEFAULT_UNROLL_N 1 | ||||
| #define SGEMM_DEFAULT_UNROLL_M 2 | #define SGEMM_DEFAULT_UNROLL_M 2 | ||||
| #define DGEMM_DEFAULT_UNROLL_M 16 | |||||
| #define QGEMM_DEFAULT_UNROLL_M 2 | #define QGEMM_DEFAULT_UNROLL_M 2 | ||||
| #define CGEMM_DEFAULT_UNROLL_M 1 | #define CGEMM_DEFAULT_UNROLL_M 1 | ||||
| #define ZGEMM_DEFAULT_UNROLL_M 1 | #define ZGEMM_DEFAULT_UNROLL_M 1 | ||||