| @@ -336,14 +336,14 @@ ifeq ($(ARCH), x86) | |||
| DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ | |||
| CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||
| ifneq ($(NO_AVX), 1) | |||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL | |||
| endif | |||
| endif | |||
| ifeq ($(ARCH), x86_64) | |||
| DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||
| ifneq ($(NO_AVX), 1) | |||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL | |||
| endif | |||
| endif | |||
| @@ -63,6 +63,8 @@ $architecture = mips64 if ($data =~ /ARCH_MIPS64/); | |||
| $architecture = alpha if ($data =~ /ARCH_ALPHA/); | |||
| $architecture = sparc if ($data =~ /ARCH_SPARC/); | |||
| $architecture = ia64 if ($data =~ /ARCH_IA64/); | |||
| $architecture = arm if ($data =~ /ARCH_ARM/); | |||
| $architecture = arm64 if ($data =~ /ARCH_ARM64/); | |||
| $defined = 0; | |||
| @@ -149,6 +151,8 @@ $architecture = mips64 if ($data =~ /ARCH_MIPS64/); | |||
| $architecture = alpha if ($data =~ /ARCH_ALPHA/); | |||
| $architecture = sparc if ($data =~ /ARCH_SPARC/); | |||
| $architecture = ia64 if ($data =~ /ARCH_IA64/); | |||
| $architecture = arm if ($data =~ /ARCH_ARM/); | |||
| $architecture = arm64 if ($data =~ /ARCH_ARM64/); | |||
| $binformat = bin32; | |||
| $binformat = bin64 if ($data =~ /BINARY_64/); | |||
| @@ -107,7 +107,7 @@ | |||
| #define CORE_BOBCAT 21 | |||
| #define CORE_BULLDOZER 22 | |||
| #define CORE_PILEDRIVER 23 | |||
| #define CORE_HASWELL CORE_SANDYBRIDGE | |||
| #define CORE_HASWELL 24 | |||
| #define HAVE_SSE (1 << 0) | |||
| #define HAVE_SSE2 (1 << 1) | |||
| @@ -200,7 +200,6 @@ typedef struct { | |||
| #define CPUTYPE_BOBCAT 45 | |||
| #define CPUTYPE_BULLDOZER 46 | |||
| #define CPUTYPE_PILEDRIVER 47 | |||
| // this define is because BLAS doesn't have haswell specific optimizations yet | |||
| #define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE | |||
| #define CPUTYPE_HASWELL 48 | |||
| #endif | |||
| @@ -1243,6 +1243,7 @@ static char *cpuname[] = { | |||
| "BOBCAT", | |||
| "BULLDOZER", | |||
| "PILEDRIVER", | |||
| "HASWELL", | |||
| }; | |||
| static char *lowercpuname[] = { | |||
| @@ -1293,6 +1294,7 @@ static char *lowercpuname[] = { | |||
| "bobcat", | |||
| "bulldozer", | |||
| "piledriver", | |||
| "haswell", | |||
| }; | |||
| static char *corename[] = { | |||
| @@ -1320,6 +1322,7 @@ static char *corename[] = { | |||
| "BOBCAT", | |||
| "BULLDOZER", | |||
| "PILEDRIVER", | |||
| "HASWELL", | |||
| }; | |||
| static char *corename_lower[] = { | |||
| @@ -1347,6 +1350,7 @@ static char *corename_lower[] = { | |||
| "bobcat", | |||
| "bulldozer", | |||
| "piledriver", | |||
| "haswell", | |||
| }; | |||
| @@ -65,14 +65,15 @@ extern gotoblas_t gotoblas_BOBCAT; | |||
| extern gotoblas_t gotoblas_SANDYBRIDGE; | |||
| extern gotoblas_t gotoblas_BULLDOZER; | |||
| extern gotoblas_t gotoblas_PILEDRIVER; | |||
| extern gotoblas_t gotoblas_HASWELL; | |||
| #else | |||
| //Use NEHALEM kernels for sandy bridge | |||
| #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM | |||
| #define gotoblas_HASWELL gotoblas_NEHALEM | |||
| #define gotoblas_BULLDOZER gotoblas_BARCELONA | |||
| #define gotoblas_PILEDRIVER gotoblas_BARCELONA | |||
| #endif | |||
| //Use sandy bridge kernels for haswell. | |||
| #define gotoblas_HASWELL gotoblas_SANDYBRIDGE | |||
| #define VENDOR_INTEL 1 | |||
| #define VENDOR_AMD 2 | |||
| @@ -297,6 +298,7 @@ static char *corename[] = { | |||
| "Bobcat", | |||
| "Bulldozer", | |||
| "Piledriver", | |||
| "Haswell", | |||
| }; | |||
| char *gotoblas_corename(void) { | |||
| @@ -319,7 +321,8 @@ char *gotoblas_corename(void) { | |||
| if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16]; | |||
| if (gotoblas == &gotoblas_BOBCAT) return corename[17]; | |||
| if (gotoblas == &gotoblas_BULLDOZER) return corename[18]; | |||
| if (gotoblas == &gotoblas_PILEDRIVER) return corename[19]; | |||
| if (gotoblas == &gotoblas_PILEDRIVER) return corename[19]; | |||
| if (gotoblas == &gotoblas_HASWELL) return corename[20]; | |||
| return corename[0]; | |||
| } | |||
| @@ -298,6 +298,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CORENAME "SANDYBRIDGE" | |||
| #endif | |||
| #ifdef FORCE_HASWELL | |||
| #define FORCE | |||
| #define FORCE_INTEL | |||
| #define ARCHITECTURE "X86" | |||
| #define SUBARCHITECTURE "HASWELL" | |||
| #define ARCHCONFIG "-DHASWELL " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||
| "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \ | |||
| "-DFMA3" | |||
| #define LIBNAME "haswell" | |||
| #define CORENAME "HASWELL" | |||
| #endif | |||
| #ifdef FORCE_ATOM | |||
| #define FORCE | |||
| #define FORCE_INTEL | |||
| @@ -679,6 +694,52 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CORENAME "generic" | |||
| #endif | |||
| #ifdef FORCE_ARMV7 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM" | |||
| #define SUBARCHITECTURE "ARMV7" | |||
| #define SUBDIRNAME "arm" | |||
| #define ARCHCONFIG "-DARMV7 " \ | |||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
| "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | |||
| "-DHAVE_VFPV3 -DHAVE_VFP" | |||
| #define LIBNAME "armv7" | |||
| #define CORENAME "ARMV7" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_ARMV6 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM" | |||
| #define SUBARCHITECTURE "ARMV6" | |||
| #define SUBDIRNAME "arm" | |||
| #define ARCHCONFIG "-DARMV6 " \ | |||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
| "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | |||
| "-DHAVE_VFP" | |||
| #define LIBNAME "armv6" | |||
| #define CORENAME "ARMV6" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_ARMV8 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "ARMV8" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DARMV8 " \ | |||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
| "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | |||
| "-DHAVE_VFP -DHAVE_VFPV3 -DHAVE_VFPV4" | |||
| #define LIBNAME "armv8" | |||
| #define CORENAME "ARMV8" | |||
| #else | |||
| #endif | |||
| #ifndef FORCE | |||
| #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \ | |||
| @@ -719,6 +780,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define OPENBLAS_SUPPORTED | |||
| #endif | |||
| #ifdef __arm__ | |||
| #include "cpuid_arm.c" | |||
| #define OPENBLAS_SUPPORTED | |||
| #endif | |||
| #ifndef OPENBLAS_SUPPORTED | |||
| #error "This arch/CPU is not supported by OpenBLAS." | |||
| #endif | |||
| @@ -773,7 +840,7 @@ int main(int argc, char *argv[]){ | |||
| #ifdef FORCE | |||
| printf("CORE=%s\n", CORENAME); | |||
| #else | |||
| #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) | |||
| #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) | |||
| printf("CORE=%s\n", get_corename()); | |||
| #endif | |||
| #endif | |||
| @@ -788,6 +855,11 @@ int main(int argc, char *argv[]){ | |||
| printf("NUM_CORES=%d\n", get_num_cores()); | |||
| #if defined(__arm__) && !defined(FORCE) | |||
| get_features(); | |||
| #endif | |||
| #if defined(__i386__) || defined(__x86_64__) | |||
| #ifndef FORCE | |||
| get_sse(); | |||
| @@ -14,6 +14,20 @@ ifeq ($(ARCH), MIPS) | |||
| USE_GEMM3M = 1 | |||
| endif | |||
| ifeq ($(ARCH), arm) | |||
| USE_TRMM = 1 | |||
| endif | |||
| ifeq ($(ARCH), arm64) | |||
| USE_TRMM = 1 | |||
| endif | |||
| ifeq ($(TARGET), LOONGSON3B) | |||
| USE_TRMM = 1 | |||
| endif | |||
| SKERNELOBJS += \ | |||
| sgemm_kernel$(TSUFFIX).$(SUFFIX) \ | |||
| $(SGEMMINCOPYOBJ) $(SGEMMITCOPYOBJ) \ | |||
| @@ -498,7 +512,8 @@ $(KDIR)xgemm_kernel_r$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMD | |||
| $(KDIR)xgemm_kernel_b$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) $(XGEMMDEPEND) | |||
| $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $@ | |||
| ifeq ($(TARGET), LOONGSON3B) | |||
| ifdef USE_TRMM | |||
| $(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ | |||
| @@ -582,24 +597,6 @@ $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| $(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ | |||
| else | |||
| ifdef STRMMKERNEL | |||
| $(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ | |||
| $(KDIR)strmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ | |||
| $(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ | |||
| $(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ | |||
| else | |||
| $(KDIR)strmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ | |||
| @@ -613,93 +610,17 @@ $(KDIR)strmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) | |||
| $(KDIR)strmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ | |||
| endif | |||
| ifdef DTRMMKERNEL | |||
| ifdef DTRMMKERNEL_LN | |||
| $(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL_LN) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ | |||
| else | |||
| $(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ | |||
| endif | |||
| ifdef DTRMMKERNEL_LT | |||
| $(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL_LT) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ | |||
| else | |||
| $(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ | |||
| endif | |||
| ifdef DTRMMKERNEL_RN | |||
| $(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL_RN) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ | |||
| else | |||
| $(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ | |||
| endif | |||
| ifdef DTRMMKERNEL_RT | |||
| $(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL_RT) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ | |||
| else | |||
| $(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ | |||
| endif | |||
| else | |||
| ifdef DTRMMKERNEL_LN | |||
| $(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL_LN) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ | |||
| else | |||
| $(KDIR)dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ | |||
| endif | |||
| ifdef DTRMMKERNEL_LT | |||
| $(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL_LT) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ | |||
| else | |||
| $(KDIR)dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ | |||
| endif | |||
| ifdef DTRMMKERNEL_RN | |||
| $(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL_RN) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ | |||
| else | |||
| $(KDIR)dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ | |||
| endif | |||
| ifdef DTRMMKERNEL_RT | |||
| $(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL_RT) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ | |||
| else | |||
| $(KDIR)dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ | |||
| endif | |||
| endif | |||
| ifdef QTRMMKERNEL | |||
| $(KDIR)qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ | |||
| $(KDIR)qtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -DTRANSA $< -o $@ | |||
| $(KDIR)qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -UTRANSA $< -o $@ | |||
| $(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ | |||
| else | |||
| $(KDIR)qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -DLEFT -UTRANSA $< -o $@ | |||
| @@ -713,36 +634,6 @@ $(KDIR)qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) | |||
| $(KDIR)qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(QGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -UCOMPLEX -ULEFT -DTRANSA $< -o $@ | |||
| endif | |||
| ifdef CTRMMKERNEL | |||
| $(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ | |||
| $(KDIR)ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ | |||
| $(KDIR)ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ | |||
| $(KDIR)ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ | |||
| $(KDIR)ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ | |||
| $(KDIR)ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ | |||
| $(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ | |||
| $(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ | |||
| else | |||
| $(KDIR)ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ | |||
| @@ -767,37 +658,6 @@ $(KDIR)ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) | |||
| $(KDIR)ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ | |||
| endif | |||
| ifdef ZTRMMKERNEL | |||
| $(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ | |||
| $(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ | |||
| $(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ | |||
| $(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ | |||
| $(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ | |||
| $(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ | |||
| $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ | |||
| $(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ | |||
| else | |||
| $(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ | |||
| @@ -821,37 +681,10 @@ $(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) | |||
| $(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ | |||
| endif | |||
| endif | |||
| ifdef XTRMMKERNEL | |||
| $(KDIR)xtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ | |||
| $(KDIR)xtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@ | |||
| $(KDIR)xtrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@ | |||
| $(KDIR)xtrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@ | |||
| $(KDIR)xtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@ | |||
| $(KDIR)xtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@ | |||
| $(KDIR)xtrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@ | |||
| $(KDIR)xtrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ | |||
| else | |||
| $(KDIR)xtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@ | |||
| @@ -877,9 +710,6 @@ $(KDIR)xtrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) | |||
| $(KDIR)xtrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMMKERNEL) | |||
| $(CC) $(CFLAGS) -c -DTRMMKERNEL -DXDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@ | |||
| endif | |||
| $(KDIR)cgemm3m_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEMM3MKERNEL) | |||
| $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DNN $< -o $@ | |||
| @@ -0,0 +1 @@ | |||
| include $(KERNELDIR)/KERNEL.PENRYN | |||
| @@ -62,7 +62,7 @@ | |||
| #define PREFETCHSIZE (8 * 21 + 4) | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht0 | |||
| #define PREFETCHSIZE (8 * 21 + 4) | |||
| #endif | |||
| @@ -62,7 +62,7 @@ | |||
| #define PREFETCHSIZE (8 * 21 + 4) | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht0 | |||
| #define PREFETCHSIZE (8 * 21 + 4) | |||
| #endif | |||
| @@ -62,7 +62,7 @@ | |||
| #define PREFETCHSIZE (8 * 21 + 4) | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht0 | |||
| #define PREFETCHSIZE (8 * 21 + 4) | |||
| #endif | |||
| @@ -62,7 +62,7 @@ | |||
| #define PREFETCHSIZE (8 * 21 + 4) | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht0 | |||
| #define PREFETCHSIZE (8 * 21 + 4) | |||
| #endif | |||
| @@ -62,7 +62,7 @@ | |||
| #define PREFETCHSIZE (8 * 21 + 4) | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht0 | |||
| #define PREFETCHSIZE (8 * 21 + 4) | |||
| #endif | |||
| @@ -62,7 +62,7 @@ | |||
| #define PREFETCHSIZE (8 * 21 + 4) | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht0 | |||
| #define PREFETCHSIZE (8 * 21 + 4) | |||
| #endif | |||
| @@ -61,7 +61,7 @@ | |||
| #define PREFETCHSIZE 84 | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht1 | |||
| #define PREFETCHSIZE 84 | |||
| #endif | |||
| @@ -63,7 +63,7 @@ | |||
| #define PREFETCHSIZE 84 | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht1 | |||
| #define PREFETCHSIZE 84 | |||
| #endif | |||
| @@ -61,7 +61,7 @@ | |||
| #define PREFETCHSIZE 84 | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht1 | |||
| #define PREFETCHSIZE 84 | |||
| #endif | |||
| @@ -63,7 +63,7 @@ | |||
| #define PREFETCHSIZE 84 | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht1 | |||
| #define PREFETCHSIZE 84 | |||
| #endif | |||
| @@ -61,7 +61,7 @@ | |||
| #define PREFETCHSIZE 84 | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht1 | |||
| #define PREFETCHSIZE 84 | |||
| #endif | |||
| @@ -0,0 +1,63 @@ | |||
| SGEMMKERNEL = sgemm_kernel_16x4_haswell.S | |||
| SGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||
| SGEMMITCOPY = ../generic/gemm_tcopy_16.c | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMKERNEL = dgemm_kernel_4x4_haswell.S | |||
| DGEMMINCOPY = | |||
| DGEMMITCOPY = | |||
| DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
| DGEMMINCOPYOBJ = | |||
| DGEMMITCOPYOBJ = | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMKERNEL = cgemm_kernel_8x2_haswell.S | |||
| CGEMMINCOPY = ../generic/zgemm_ncopy_8.c | |||
| CGEMMITCOPY = ../generic/zgemm_tcopy_8.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMKERNEL = zgemm_kernel_4x2_haswell.S | |||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_4.c | |||
| ZGEMMITCOPY = ../generic/zgemm_tcopy_4.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| CGEMM3MKERNEL = zgemm3m_kernel_4x8_nehalem.S | |||
| ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S | |||
| @@ -57,7 +57,7 @@ | |||
| #define PREFETCHSIZE (16 * 12) | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht0 | |||
| #define PREFETCHW prefetcht0 | |||
| #define PREFETCHSIZE (16 * 12) | |||
| @@ -57,7 +57,7 @@ | |||
| #define PREFETCHSIZE (16 * 12) | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht0 | |||
| #define PREFETCHW prefetcht0 | |||
| #define PREFETCHSIZE (16 * 12) | |||
| @@ -57,7 +57,7 @@ | |||
| #define PREFETCHSIZE (16 * 12) | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht0 | |||
| #define PREFETCHW prefetcht0 | |||
| #define PREFETCHSIZE (16 * 12) | |||
| @@ -57,7 +57,7 @@ | |||
| #define PREFETCHSIZE (16 * 12) | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht0 | |||
| #define PREFETCHW prefetcht0 | |||
| #define PREFETCHSIZE (16 * 24) | |||
| @@ -57,7 +57,7 @@ | |||
| #define PREFETCHSIZE (16 * 24) | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht0 | |||
| #define PREFETCHW prefetcht0 | |||
| #define PREFETCHSIZE (16 * 24) | |||
| @@ -57,7 +57,7 @@ | |||
| #define PREFETCHSIZE (16 * 24) | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht0 | |||
| #define PREFETCHW prefetcht0 | |||
| #define PREFETCHSIZE (16 * 24) | |||
| @@ -57,7 +57,7 @@ | |||
| #define PREFETCHSIZE (16 * 24) | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht0 | |||
| #define PREFETCHW prefetcht0 | |||
| #define PREFETCHSIZE (16 * 24) | |||
| @@ -57,7 +57,7 @@ | |||
| #define PREFETCHSIZE (16 * 24) | |||
| #endif | |||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||
| #define PREFETCH prefetcht0 | |||
| #define PREFETCHW prefetcht0 | |||
| #define PREFETCHSIZE (16 * 24) | |||
| @@ -1154,6 +1154,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #ifdef HASWELL | |||
| <<<<<<< HEAD | |||
| #define SNUMOPT 8 | |||
| #define DNUMOPT 4 | |||
| @@ -1164,6 +1165,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define SYMV_P 8 | |||
| #define SWITCH_RATIO 4 | |||
| ======= | |||
| #define SNUMOPT 8 | |||
| #define DNUMOPT 4 | |||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
| #define SYMV_P 8 | |||
| #define SWITCH_RATIO 4 | |||
| >>>>>>> origin/haswell | |||
| #ifdef ARCH_X86 | |||
| @@ -1233,6 +1246,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define ZGEMM_DEFAULT_Q 128 | |||
| #define SGEMM_DEFAULT_R sgemm_r | |||
| <<<<<<< HEAD | |||
| ======= | |||
| //#define DGEMM_DEFAULT_R dgemm_r | |||
| >>>>>>> origin/haswell | |||
| #define DGEMM_DEFAULT_R 13824 | |||
| #define CGEMM_DEFAULT_R cgemm_r | |||
| #define ZGEMM_DEFAULT_R zgemm_r | |||