| @@ -336,14 +336,14 @@ ifeq ($(ARCH), x86) | |||||
| DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ | DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ | ||||
| CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | ||||
| ifneq ($(NO_AVX), 1) | ifneq ($(NO_AVX), 1) | ||||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL | |||||
| endif | endif | ||||
| endif | endif | ||||
| ifeq ($(ARCH), x86_64) | ifeq ($(ARCH), x86_64) | ||||
| DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | ||||
| ifneq ($(NO_AVX), 1) | ifneq ($(NO_AVX), 1) | ||||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL | |||||
| endif | endif | ||||
| endif | endif | ||||
| @@ -107,7 +107,7 @@ | |||||
| #define CORE_BOBCAT 21 | #define CORE_BOBCAT 21 | ||||
| #define CORE_BULLDOZER 22 | #define CORE_BULLDOZER 22 | ||||
| #define CORE_PILEDRIVER 23 | #define CORE_PILEDRIVER 23 | ||||
| #define CORE_HASWELL CORE_SANDYBRIDGE | |||||
| #define CORE_HASWELL 24 | |||||
| #define HAVE_SSE (1 << 0) | #define HAVE_SSE (1 << 0) | ||||
| #define HAVE_SSE2 (1 << 1) | #define HAVE_SSE2 (1 << 1) | ||||
| @@ -200,7 +200,6 @@ typedef struct { | |||||
| #define CPUTYPE_BOBCAT 45 | #define CPUTYPE_BOBCAT 45 | ||||
| #define CPUTYPE_BULLDOZER 46 | #define CPUTYPE_BULLDOZER 46 | ||||
| #define CPUTYPE_PILEDRIVER 47 | #define CPUTYPE_PILEDRIVER 47 | ||||
| // this define is because BLAS doesn't have haswell specific optimizations yet | |||||
| #define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE | |||||
| #define CPUTYPE_HASWELL 48 | |||||
| #endif | #endif | ||||
| @@ -1243,6 +1243,7 @@ static char *cpuname[] = { | |||||
| "BOBCAT", | "BOBCAT", | ||||
| "BULLDOZER", | "BULLDOZER", | ||||
| "PILEDRIVER", | "PILEDRIVER", | ||||
| "HASWELL", | |||||
| }; | }; | ||||
| static char *lowercpuname[] = { | static char *lowercpuname[] = { | ||||
| @@ -1293,6 +1294,7 @@ static char *lowercpuname[] = { | |||||
| "bobcat", | "bobcat", | ||||
| "bulldozer", | "bulldozer", | ||||
| "piledriver", | "piledriver", | ||||
| "haswell", | |||||
| }; | }; | ||||
| static char *corename[] = { | static char *corename[] = { | ||||
| @@ -1320,6 +1322,7 @@ static char *corename[] = { | |||||
| "BOBCAT", | "BOBCAT", | ||||
| "BULLDOZER", | "BULLDOZER", | ||||
| "PILEDRIVER", | "PILEDRIVER", | ||||
| "HASWELL", | |||||
| }; | }; | ||||
| static char *corename_lower[] = { | static char *corename_lower[] = { | ||||
| @@ -1347,6 +1350,7 @@ static char *corename_lower[] = { | |||||
| "bobcat", | "bobcat", | ||||
| "bulldozer", | "bulldozer", | ||||
| "piledriver", | "piledriver", | ||||
| "haswell", | |||||
| }; | }; | ||||
| @@ -65,14 +65,15 @@ extern gotoblas_t gotoblas_BOBCAT; | |||||
| extern gotoblas_t gotoblas_SANDYBRIDGE; | extern gotoblas_t gotoblas_SANDYBRIDGE; | ||||
| extern gotoblas_t gotoblas_BULLDOZER; | extern gotoblas_t gotoblas_BULLDOZER; | ||||
| extern gotoblas_t gotoblas_PILEDRIVER; | extern gotoblas_t gotoblas_PILEDRIVER; | ||||
| extern gotoblas_t gotoblas_HASWELL; | |||||
| #else | #else | ||||
| //Use NEHALEM kernels for sandy bridge | //Use NEHALEM kernels for sandy bridge | ||||
| #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM | #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM | ||||
| #define gotoblas_HASWELL gotoblas_NEHALEM | |||||
| #define gotoblas_BULLDOZER gotoblas_BARCELONA | #define gotoblas_BULLDOZER gotoblas_BARCELONA | ||||
| #define gotoblas_PILEDRIVER gotoblas_BARCELONA | #define gotoblas_PILEDRIVER gotoblas_BARCELONA | ||||
| #endif | #endif | ||||
| //Use sandy bridge kernels for haswell. | |||||
| #define gotoblas_HASWELL gotoblas_SANDYBRIDGE | |||||
| #define VENDOR_INTEL 1 | #define VENDOR_INTEL 1 | ||||
| #define VENDOR_AMD 2 | #define VENDOR_AMD 2 | ||||
| @@ -297,6 +298,7 @@ static char *corename[] = { | |||||
| "Bobcat", | "Bobcat", | ||||
| "Bulldozer", | "Bulldozer", | ||||
| "Piledriver", | "Piledriver", | ||||
| "Haswell", | |||||
| }; | }; | ||||
| char *gotoblas_corename(void) { | char *gotoblas_corename(void) { | ||||
| @@ -319,7 +321,8 @@ char *gotoblas_corename(void) { | |||||
| if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16]; | if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16]; | ||||
| if (gotoblas == &gotoblas_BOBCAT) return corename[17]; | if (gotoblas == &gotoblas_BOBCAT) return corename[17]; | ||||
| if (gotoblas == &gotoblas_BULLDOZER) return corename[18]; | if (gotoblas == &gotoblas_BULLDOZER) return corename[18]; | ||||
| if (gotoblas == &gotoblas_PILEDRIVER) return corename[19]; | |||||
| if (gotoblas == &gotoblas_PILEDRIVER) return corename[19]; | |||||
| if (gotoblas == &gotoblas_HASWELL) return corename[20]; | |||||
| return corename[0]; | return corename[0]; | ||||
| } | } | ||||
| @@ -298,6 +298,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define CORENAME "SANDYBRIDGE" | #define CORENAME "SANDYBRIDGE" | ||||
| #endif | #endif | ||||
| #ifdef FORCE_HASWELL | |||||
| #define FORCE | |||||
| #define FORCE_INTEL | |||||
| #define ARCHITECTURE "X86" | |||||
| #define SUBARCHITECTURE "HASWELL" | |||||
| #define ARCHCONFIG "-DHASWELL " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
| "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \ | |||||
| "-DFMA3" | |||||
| #define LIBNAME "haswell" | |||||
| #define CORENAME "HASWELL" | |||||
| #endif | |||||
| #ifdef FORCE_ATOM | #ifdef FORCE_ATOM | ||||
| #define FORCE | #define FORCE | ||||
| #define FORCE_INTEL | #define FORCE_INTEL | ||||
| @@ -0,0 +1 @@ | |||||
| include $(KERNELDIR)/KERNEL.PENRYN | |||||
| @@ -62,7 +62,7 @@ | |||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| @@ -62,7 +62,7 @@ | |||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| @@ -62,7 +62,7 @@ | |||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| @@ -62,7 +62,7 @@ | |||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| @@ -62,7 +62,7 @@ | |||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| @@ -62,7 +62,7 @@ | |||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| @@ -61,7 +61,7 @@ | |||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| @@ -63,7 +63,7 @@ | |||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| @@ -61,7 +61,7 @@ | |||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| @@ -63,7 +63,7 @@ | |||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| @@ -61,7 +61,7 @@ | |||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| @@ -0,0 +1,63 @@ | |||||
| SGEMMKERNEL = sgemm_kernel_16x4_haswell.S | |||||
| SGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||||
| SGEMMITCOPY = ../generic/gemm_tcopy_16.c | |||||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMKERNEL = dgemm_kernel_4x4_haswell.S | |||||
| DGEMMINCOPY = | |||||
| DGEMMITCOPY = | |||||
| DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
| DGEMMINCOPYOBJ = | |||||
| DGEMMITCOPYOBJ = | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMKERNEL = cgemm_kernel_8x2_haswell.S | |||||
| CGEMMINCOPY = ../generic/zgemm_ncopy_8.c | |||||
| CGEMMITCOPY = ../generic/zgemm_tcopy_8.c | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMKERNEL = zgemm_kernel_4x2_haswell.S | |||||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_4.c | |||||
| ZGEMMITCOPY = ../generic/zgemm_tcopy_4.c | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| CGEMM3MKERNEL = zgemm3m_kernel_4x8_nehalem.S | |||||
| ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S | |||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
| #if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| @@ -1154,6 +1154,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #ifdef HASWELL | #ifdef HASWELL | ||||
| <<<<<<< HEAD | |||||
| #define SNUMOPT 8 | #define SNUMOPT 8 | ||||
| #define DNUMOPT 4 | #define DNUMOPT 4 | ||||
| @@ -1164,6 +1165,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define SYMV_P 8 | #define SYMV_P 8 | ||||
| #define SWITCH_RATIO 4 | #define SWITCH_RATIO 4 | ||||
| ======= | |||||
| #define SNUMOPT 8 | |||||
| #define DNUMOPT 4 | |||||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||||
| #define SYMV_P 8 | |||||
| #define SWITCH_RATIO 4 | |||||
| >>>>>>> origin/haswell | |||||
| #ifdef ARCH_X86 | #ifdef ARCH_X86 | ||||
| @@ -1233,6 +1246,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ZGEMM_DEFAULT_Q 128 | #define ZGEMM_DEFAULT_Q 128 | ||||
| #define SGEMM_DEFAULT_R sgemm_r | #define SGEMM_DEFAULT_R sgemm_r | ||||
| <<<<<<< HEAD | |||||
| ======= | |||||
| //#define DGEMM_DEFAULT_R dgemm_r | |||||
| >>>>>>> origin/haswell | |||||
| #define DGEMM_DEFAULT_R 13824 | #define DGEMM_DEFAULT_R 13824 | ||||
| #define CGEMM_DEFAULT_R cgemm_r | #define CGEMM_DEFAULT_R cgemm_r | ||||
| #define ZGEMM_DEFAULT_R zgemm_r | #define ZGEMM_DEFAULT_R zgemm_r | ||||