| @@ -277,14 +277,14 @@ ifeq ($(ARCH), x86) | |||
| DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ | |||
| CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||
| ifneq ($(NO_AVX), 1) | |||
| DYNAMIC_CORE += SANDYBRIDGE | |||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER | |||
| endif | |||
| endif | |||
| ifeq ($(ARCH), x86_64) | |||
| DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||
| ifneq ($(NO_AVX), 1) | |||
| DYNAMIC_CORE += SANDYBRIDGE | |||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER | |||
| endif | |||
| endif | |||
| @@ -29,6 +29,7 @@ BARCELONA | |||
| SHANGHAI | |||
| ISTANBUL | |||
| BOBCAT | |||
| BULLDOZER | |||
| c)VIA CPU: | |||
| SSE_GENERIC | |||
| @@ -125,7 +125,8 @@ | |||
| #define HAVE_MISALIGNSSE (1 << 15) | |||
| #define HAVE_128BITFPU (1 << 16) | |||
| #define HAVE_FASTMOVU (1 << 17) | |||
| #define HAVE_AVX (1 << 18) | |||
| #define HAVE_AVX (1 << 18) | |||
| #define HAVE_FMA4 (1 << 19) | |||
| #define CACHE_INFO_L1_I 1 | |||
| #define CACHE_INFO_L1_D 2 | |||
| @@ -43,6 +43,8 @@ | |||
| #ifdef NO_AVX | |||
| #define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM | |||
| #define CORE_SANDYBRIDGE CORE_NEHALEM | |||
| #define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA | |||
| #define CORE_BULLDOZER CORE_BARCELONA | |||
| #endif | |||
| #ifndef CPUIDEMU | |||
| @@ -229,6 +231,9 @@ int get_cputype(int gettype){ | |||
| cpuid(0x80000001, &eax, &ebx, &ecx, &edx); | |||
| if ((ecx & (1 << 6)) != 0) feature |= HAVE_SSE4A; | |||
| if ((ecx & (1 << 7)) != 0) feature |= HAVE_MISALIGNSSE; | |||
| #ifndef NO_AVX | |||
| if ((ecx & (1 << 16)) != 0) feature |= HAVE_FMA4; | |||
| #endif | |||
| if ((edx & (1 << 30)) != 0) feature |= HAVE_3DNOWEX; | |||
| if ((edx & (1 << 31)) != 0) feature |= HAVE_3DNOW; | |||
| } | |||
| @@ -1078,8 +1083,12 @@ int get_cpuname(void){ | |||
| return CPUTYPE_OPTERON; | |||
| case 1: | |||
| case 10: | |||
| case 6: //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series | |||
| return CPUTYPE_BARCELONA; | |||
| case 6: //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series | |||
| if(support_avx()) | |||
| return CPUTYPE_BULLDOZER; | |||
| else | |||
| return CPUTYPE_BARCELONA; //OS don't support AVX. | |||
| case 5: | |||
| return CPUTYPE_BOBCAT; | |||
| } | |||
| @@ -1432,8 +1441,13 @@ int get_coretype(void){ | |||
| if (family == 0xf){ | |||
| if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON; | |||
| else if (exfamily == 5) return CORE_BOBCAT; | |||
| else if (exfamily == 6) return CORE_BARCELONA; //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series | |||
| else return CORE_BARCELONA; | |||
| else if (exfamily == 6) { | |||
| //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series | |||
| if(support_avx()) | |||
| return CORE_BULLDOZER; | |||
| else | |||
| return CORE_BARCELONA; //OS don't support AVX. Use old kernels. | |||
| }else return CORE_BARCELONA; | |||
| } | |||
| } | |||
| @@ -1519,6 +1533,7 @@ void get_cpuconfig(void){ | |||
| if (features & HAVE_AVX ) printf("#define HAVE_AVX\n"); | |||
| if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n"); | |||
| if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n"); | |||
| if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n"); | |||
| if (features & HAVE_CFLUSH) printf("#define HAVE_CFLUSH\n"); | |||
| if (features & HAVE_HIT) printf("#define HAVE_HIT 1\n"); | |||
| if (features & HAVE_MISALIGNSSE) printf("#define HAVE_MISALIGNSSE\n"); | |||
| @@ -1585,5 +1600,6 @@ void get_sse(void){ | |||
| if (features & HAVE_AVX ) printf("HAVE_AVX=1\n"); | |||
| if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n"); | |||
| if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n"); | |||
| if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n"); | |||
| } | |||
| @@ -63,9 +63,11 @@ extern gotoblas_t gotoblas_BARCELONA; | |||
| extern gotoblas_t gotoblas_BOBCAT; | |||
| #ifndef NO_AVX | |||
| extern gotoblas_t gotoblas_SANDYBRIDGE; | |||
| extern gotoblas_t gotoblas_BULLDOZER; | |||
| #else | |||
| //Use NEHALEM kernels for sandy bridge | |||
| #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM | |||
| #define gotoblas_BULLDOZER gotoblas_BARCELONA | |||
| #endif | |||
| @@ -204,6 +206,14 @@ static gotoblas_t *get_coretype(void){ | |||
| else return &gotoblas_OPTERON; | |||
| } else if (exfamily == 5) { | |||
| return &gotoblas_BOBCAT; | |||
| } else if (exfamily == 6) { | |||
| //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series | |||
| if(support_avx()) | |||
| return &gotoblas_BULLDOZER; | |||
| else{ | |||
| fprintf(stderr, "OpenBLAS : Your OS doesn't support AVX. Use Barcelona kernels.\n"); | |||
| return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | |||
| } | |||
| } else { | |||
| return &gotoblas_BARCELONA; | |||
| } | |||
| @@ -240,6 +250,7 @@ static char *corename[] = { | |||
| "Nano", | |||
| "Sandybridge", | |||
| "Bobcat", | |||
| "Bulldozer", | |||
| }; | |||
| char *gotoblas_corename(void) { | |||
| @@ -261,6 +272,7 @@ char *gotoblas_corename(void) { | |||
| if (gotoblas == &gotoblas_NANO) return corename[15]; | |||
| if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16]; | |||
| if (gotoblas == &gotoblas_BOBCAT) return corename[17]; | |||
| if (gotoblas == &gotoblas_BULLDOZER) return corename[18]; | |||
| return corename[0]; | |||
| } | |||
| @@ -163,7 +163,7 @@ int get_L2_size(void){ | |||
| int eax, ebx, ecx, edx; | |||
| #if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || \ | |||
| #if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ | |||
| defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ | |||
| defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) | |||
| @@ -350,7 +350,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CORENAME "OPTERON" | |||
| #endif | |||
| #if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_BULLDOZER) | |||
| #if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) | |||
| #define FORCE | |||
| #define FORCE_INTEL | |||
| #define ARCHITECTURE "X86" | |||
| @@ -380,6 +380,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CORENAME "BOBCAT" | |||
| #endif | |||
| #if defined (FORCE_BULLDOZER) | |||
| #define FORCE | |||
| #define FORCE_INTEL | |||
| #define ARCHITECTURE "X86" | |||
| #define SUBARCHITECTURE "BULLDOZER" | |||
| #define ARCHCONFIG "-DBULLDOZER " \ | |||
| "-DL1_DATA_SIZE=49152 -DL1_DATA_LINESIZE=64 " \ | |||
| "-DL2_SIZE=1024000 -DL2_LINESIZE=64 -DL3_SIZE=16777216 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 " \ | |||
| "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \ | |||
| "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU " \ | |||
| "-DHAVE_AVX -DHAVE_FMA4" | |||
| #define LIBNAME "bulldozer" | |||
| #define CORENAME "BULLDOZER" | |||
| #endif | |||
| #ifdef FORCE_SSE_GENERIC | |||
| #define FORCE | |||
| #define FORCE_INTEL | |||
| @@ -810,6 +810,22 @@ static void init_parameter(void) { | |||
| #endif | |||
| #endif | |||
| #ifdef BULLDOZER | |||
| #ifdef DEBUG | |||
| fprintf(stderr, "Bulldozer\n"); | |||
| #endif | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
| TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
| #endif | |||
| #endif | |||
| #ifdef NANO | |||
| #ifdef DEBUG | |||
| @@ -0,0 +1,59 @@ | |||
| SGEMMKERNEL = gemm_kernel_4x4_barcelona.S | |||
| SGEMMINCOPY = | |||
| SGEMMITCOPY = | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
| SGEMMINCOPYOBJ = | |||
| SGEMMITCOPYOBJ = | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMKERNEL = gemm_kernel_2x4_barcelona.S | |||
| DGEMMINCOPY = ../generic/gemm_ncopy_2.c | |||
| DGEMMITCOPY = ../generic/gemm_tcopy_2.c | |||
| DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMKERNEL = zgemm_kernel_2x2_barcelona.S | |||
| CGEMMINCOPY = | |||
| CGEMMITCOPY = | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| CGEMMINCOPYOBJ = | |||
| CGEMMITCOPYOBJ = | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMKERNEL = zgemm_kernel_1x2_barcelona.S | |||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_1.c | |||
| ZGEMMITCOPY = ../generic/zgemm_tcopy_1.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| STRSMKERNEL_LN = trsm_kernel_LN_4x4_sse.S | |||
| STRSMKERNEL_LT = trsm_kernel_LT_4x4_sse.S | |||
| STRSMKERNEL_RN = trsm_kernel_LT_4x4_sse.S | |||
| STRSMKERNEL_RT = trsm_kernel_RT_4x4_sse.S | |||
| DTRSMKERNEL_LN = trsm_kernel_LN_2x4_sse2.S | |||
| DTRSMKERNEL_LT = trsm_kernel_LT_2x4_sse2.S | |||
| DTRSMKERNEL_RN = trsm_kernel_LT_2x4_sse2.S | |||
| DTRSMKERNEL_RT = trsm_kernel_RT_2x4_sse2.S | |||
| CTRSMKERNEL_LN = ztrsm_kernel_LN_2x2_sse.S | |||
| CTRSMKERNEL_LT = ztrsm_kernel_LT_2x2_sse.S | |||
| CTRSMKERNEL_RN = ztrsm_kernel_LT_2x2_sse.S | |||
| CTRSMKERNEL_RT = ztrsm_kernel_RT_2x2_sse.S | |||
| ZTRSMKERNEL_LN = ztrsm_kernel_LT_1x2_sse2.S | |||
| ZTRSMKERNEL_LT = ztrsm_kernel_LT_1x2_sse2.S | |||
| ZTRSMKERNEL_RN = ztrsm_kernel_LT_1x2_sse2.S | |||
| ZTRSMKERNEL_RT = ztrsm_kernel_RT_1x2_sse2.S | |||
| CGEMM3MKERNEL = zgemm3m_kernel_4x4_barcelona.S | |||
| ZGEMM3MKERNEL = zgemm3m_kernel_2x4_barcelona.S | |||
| @@ -596,7 +596,7 @@ | |||
| .L22: | |||
| mulps %xmm0, %xmm2 | |||
| addps %xmm2, %xmm4 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movsd 4 * SIZE(BB), %xmm2 | |||
| @@ -842,7 +842,7 @@ | |||
| .L32: | |||
| mulss %xmm0, %xmm2 | |||
| addss %xmm2, %xmm4 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movss 4 * SIZE(BB), %xmm2 | |||
| @@ -1168,7 +1168,7 @@ | |||
| .L52: | |||
| mulps %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| mulps 4 * SIZE(BB), %xmm0 | |||
| @@ -1198,7 +1198,7 @@ | |||
| addps %xmm0, %xmm5 | |||
| movaps 32 * SIZE(AA), %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | |||
| #endif | |||
| mulps %xmm1, %xmm2 | |||
| @@ -1347,7 +1347,7 @@ | |||
| ALIGN_4 | |||
| .L62: | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| @@ -1531,7 +1531,7 @@ | |||
| .L72: | |||
| mulss %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| mulss 4 * SIZE(BB), %xmm0 | |||
| @@ -1778,7 +1778,7 @@ | |||
| .L92: | |||
| mulps %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movaps 4 * SIZE(AA), %xmm0 | |||
| @@ -1793,7 +1793,7 @@ | |||
| mulps 12 * SIZE(BB), %xmm0 | |||
| addps %xmm0, %xmm7 | |||
| movaps 32 * SIZE(AA), %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | |||
| #endif | |||
| mulps %xmm1, %xmm3 | |||
| @@ -1924,7 +1924,7 @@ | |||
| .L102: | |||
| mulps %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movsd 2 * SIZE(AA), %xmm0 | |||
| @@ -2069,7 +2069,7 @@ | |||
| .L112: | |||
| mulss %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movss 1 * SIZE(AA), %xmm0 | |||
| @@ -269,7 +269,7 @@ | |||
| sarl $5, I | |||
| jle .L113 | |||
| #if defined(BARCELONA) | |||
| #if defined(BARCELONA) || defined(BULLDOZER) | |||
| movaps %xmm0, %xmm1 | |||
| mulps -32 * SIZE(X), %xmm1 | |||
| @@ -253,7 +253,7 @@ | |||
| sarl $4, I | |||
| jle .L113 | |||
| #if defined(BARCELONA) | |||
| #if defined(BARCELONA) || defined(BULLDOZER) | |||
| movaps %xmm0, %xmm1 | |||
| mulpd -16 * SIZE(X), %xmm1 | |||
| @@ -69,7 +69,7 @@ | |||
| #define STACK_ALIGN 4096 | |||
| #define STACK_OFFSET 1024 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHSIZE (8 * 10 + 4) | |||
| #endif | |||
| @@ -439,7 +439,7 @@ | |||
| .L22: | |||
| mulsd %xmm0, %xmm2 | |||
| addsd %xmm2, %xmm4 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movlpd 2 * SIZE(BB), %xmm2 | |||
| @@ -488,7 +488,7 @@ | |||
| movlpd 40 * SIZE(BB), %xmm3 | |||
| addsd %xmm0, %xmm7 | |||
| movlpd 8 * SIZE(AA), %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 8) * SIZE(AA) | |||
| #endif | |||
| mulsd %xmm1, %xmm2 | |||
| @@ -1697,7 +1697,7 @@ | |||
| .L42: | |||
| mulpd %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| mulpd 2 * SIZE(BB), %xmm0 | |||
| @@ -1727,7 +1727,7 @@ | |||
| addpd %xmm0, %xmm7 | |||
| movapd 16 * SIZE(AA), %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 8) * SIZE(AA) | |||
| #endif | |||
| mulpd %xmm1, %xmm2 | |||
| @@ -64,7 +64,7 @@ | |||
| #define BORIG 60(%esp) | |||
| #define BUFFER 128(%esp) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHSIZE (16 * 10 + 8) | |||
| @@ -437,7 +437,7 @@ | |||
| .L32: | |||
| mulss %xmm0, %xmm2 | |||
| addss %xmm2, %xmm4 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movss 4 * SIZE(BB), %xmm2 | |||
| @@ -833,7 +833,7 @@ | |||
| .L22: | |||
| mulps %xmm0, %xmm2 | |||
| addps %xmm2, %xmm4 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movaps 4 * SIZE(BB), %xmm2 | |||
| @@ -1848,7 +1848,7 @@ | |||
| .L72: | |||
| mulss %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| mulss 4 * SIZE(BB), %xmm0 | |||
| @@ -2109,7 +2109,7 @@ | |||
| ALIGN_4 | |||
| .L62: | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| @@ -2429,7 +2429,7 @@ | |||
| .L52: | |||
| mulps %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| mulps 4 * SIZE(BB), %xmm0 | |||
| @@ -2459,7 +2459,7 @@ | |||
| addps %xmm0, %xmm5 | |||
| movaps 32 * SIZE(AA), %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | |||
| #endif | |||
| mulps %xmm1, %xmm2 | |||
| @@ -2952,7 +2952,7 @@ | |||
| .L112: | |||
| mulss %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movss 1 * SIZE(AA), %xmm0 | |||
| @@ -3148,7 +3148,7 @@ | |||
| .L102: | |||
| mulps %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movsd 2 * SIZE(AA), %xmm0 | |||
| @@ -3389,7 +3389,7 @@ | |||
| .L92: | |||
| mulps %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movaps 4 * SIZE(AA), %xmm0 | |||
| @@ -3404,7 +3404,7 @@ | |||
| mulps 12 * SIZE(BB), %xmm0 | |||
| addps %xmm0, %xmm7 | |||
| movaps 32 * SIZE(AA), %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | |||
| #endif | |||
| mulps %xmm1, %xmm3 | |||
| @@ -69,7 +69,7 @@ | |||
| #define STACK_ALIGN 4096 | |||
| #define STACK_OFFSET 1024 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHSIZE (8 * 10 + 4) | |||
| #endif | |||
| @@ -910,7 +910,7 @@ | |||
| .L22: | |||
| mulsd %xmm0, %xmm2 | |||
| addsd %xmm2, %xmm4 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movlpd 2 * SIZE(BB), %xmm2 | |||
| @@ -959,7 +959,7 @@ | |||
| movlpd 40 * SIZE(BB), %xmm3 | |||
| addsd %xmm0, %xmm7 | |||
| movlpd 8 * SIZE(AA), %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 8) * SIZE(AA) | |||
| #endif | |||
| mulsd %xmm1, %xmm2 | |||
| @@ -1439,7 +1439,7 @@ | |||
| .L42: | |||
| mulpd %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| mulpd 2 * SIZE(BB), %xmm0 | |||
| @@ -1469,7 +1469,7 @@ | |||
| addpd %xmm0, %xmm7 | |||
| movapd 16 * SIZE(AA), %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 8) * SIZE(AA) | |||
| #endif | |||
| mulpd %xmm1, %xmm2 | |||
| @@ -64,7 +64,7 @@ | |||
| #define BORIG 60(%esp) | |||
| #define BUFFER 128(%esp) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHSIZE (16 * 10 + 8) | |||
| @@ -872,7 +872,7 @@ | |||
| .L22: | |||
| mulps %xmm0, %xmm2 | |||
| addps %xmm2, %xmm4 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movaps 4 * SIZE(BB), %xmm2 | |||
| @@ -1316,7 +1316,7 @@ | |||
| .L32: | |||
| mulss %xmm0, %xmm2 | |||
| addss %xmm2, %xmm4 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movss 4 * SIZE(BB), %xmm2 | |||
| @@ -1855,7 +1855,7 @@ | |||
| .L52: | |||
| mulps %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| mulps 4 * SIZE(BB), %xmm0 | |||
| @@ -1885,7 +1885,7 @@ | |||
| addps %xmm0, %xmm5 | |||
| movaps 32 * SIZE(AA), %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | |||
| #endif | |||
| mulps %xmm1, %xmm2 | |||
| @@ -2249,7 +2249,7 @@ | |||
| ALIGN_4 | |||
| .L62: | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| @@ -2562,7 +2562,7 @@ | |||
| .L72: | |||
| mulss %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| mulss 4 * SIZE(BB), %xmm0 | |||
| @@ -2957,7 +2957,7 @@ | |||
| .L92: | |||
| mulps %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movaps 4 * SIZE(AA), %xmm0 | |||
| @@ -2972,7 +2972,7 @@ | |||
| mulps 12 * SIZE(BB), %xmm0 | |||
| addps %xmm0, %xmm7 | |||
| movaps 32 * SIZE(AA), %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | |||
| #endif | |||
| mulps %xmm1, %xmm3 | |||
| @@ -3280,7 +3280,7 @@ | |||
| .L102: | |||
| mulps %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movsd 2 * SIZE(AA), %xmm0 | |||
| @@ -3515,7 +3515,7 @@ | |||
| .L112: | |||
| mulss %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movss 1 * SIZE(AA), %xmm0 | |||
| @@ -69,7 +69,7 @@ | |||
| #define STACK_ALIGN 4096 | |||
| #define STACK_OFFSET 1024 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHSIZE (8 * 10 + 4) | |||
| #endif | |||
| @@ -1036,7 +1036,7 @@ | |||
| .L42: | |||
| mulpd %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| mulpd 2 * SIZE(BB), %xmm0 | |||
| @@ -1066,7 +1066,7 @@ | |||
| addpd %xmm0, %xmm7 | |||
| movapd 16 * SIZE(AA), %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 8) * SIZE(AA) | |||
| #endif | |||
| mulpd %xmm1, %xmm2 | |||
| @@ -2224,7 +2224,7 @@ | |||
| .L22: | |||
| mulsd %xmm0, %xmm2 | |||
| addsd %xmm2, %xmm4 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movlpd 2 * SIZE(BB), %xmm2 | |||
| @@ -2273,7 +2273,7 @@ | |||
| movlpd 40 * SIZE(BB), %xmm3 | |||
| addsd %xmm0, %xmm7 | |||
| movlpd 8 * SIZE(AA), %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 8) * SIZE(AA) | |||
| #endif | |||
| mulsd %xmm1, %xmm2 | |||
| @@ -64,7 +64,7 @@ | |||
| #define BORIG 60(%esp) | |||
| #define BUFFER 128(%esp) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHSIZE (16 * 10 + 8) | |||
| @@ -439,7 +439,7 @@ | |||
| .L92: | |||
| mulps %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movaps 4 * SIZE(AA), %xmm0 | |||
| @@ -454,7 +454,7 @@ | |||
| mulps 12 * SIZE(BB), %xmm0 | |||
| addps %xmm0, %xmm7 | |||
| movaps 32 * SIZE(AA), %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | |||
| #endif | |||
| mulps %xmm1, %xmm3 | |||
| @@ -758,7 +758,7 @@ | |||
| .L102: | |||
| mulps %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movsd 2 * SIZE(AA), %xmm0 | |||
| @@ -993,7 +993,7 @@ | |||
| .L112: | |||
| mulss %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movss 1 * SIZE(AA), %xmm0 | |||
| @@ -1324,7 +1324,7 @@ | |||
| .L52: | |||
| mulps %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| mulps 4 * SIZE(BB), %xmm0 | |||
| @@ -1354,7 +1354,7 @@ | |||
| addps %xmm0, %xmm5 | |||
| movaps 32 * SIZE(AA), %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | |||
| #endif | |||
| mulps %xmm1, %xmm2 | |||
| @@ -1718,7 +1718,7 @@ | |||
| ALIGN_4 | |||
| .L62: | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| @@ -2031,7 +2031,7 @@ | |||
| .L72: | |||
| mulss %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| mulss 4 * SIZE(BB), %xmm0 | |||
| @@ -2859,7 +2859,7 @@ | |||
| .L22: | |||
| mulps %xmm0, %xmm2 | |||
| addps %xmm2, %xmm4 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movaps 4 * SIZE(BB), %xmm2 | |||
| @@ -3303,7 +3303,7 @@ | |||
| .L32: | |||
| mulss %xmm0, %xmm2 | |||
| addss %xmm2, %xmm4 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movss 4 * SIZE(BB), %xmm2 | |||
| @@ -74,7 +74,7 @@ | |||
| #define BB %ecx | |||
| #define LDC %ebp | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| #define movsd movlps | |||
| #endif | |||
| @@ -625,7 +625,7 @@ | |||
| .L22: | |||
| mulps %xmm0, %xmm2 | |||
| addps %xmm2, %xmm4 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movsd 4 * SIZE(BB), %xmm2 | |||
| @@ -870,7 +870,7 @@ | |||
| .L32: | |||
| mulss %xmm0, %xmm2 | |||
| addss %xmm2, %xmm4 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movss 4 * SIZE(BB), %xmm2 | |||
| @@ -1173,7 +1173,7 @@ | |||
| .L52: | |||
| mulps %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| mulps 4 * SIZE(BB), %xmm0 | |||
| @@ -1203,7 +1203,7 @@ | |||
| addps %xmm0, %xmm5 | |||
| movaps 32 * SIZE(AA), %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | |||
| #endif | |||
| mulps %xmm1, %xmm2 | |||
| @@ -1359,7 +1359,7 @@ | |||
| ALIGN_4 | |||
| .L62: | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| @@ -1536,7 +1536,7 @@ | |||
| .L72: | |||
| mulss %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| mulss 4 * SIZE(BB), %xmm0 | |||
| @@ -1794,7 +1794,7 @@ | |||
| .L92: | |||
| mulps %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movaps 4 * SIZE(AA), %xmm0 | |||
| @@ -1809,7 +1809,7 @@ | |||
| mulps 12 * SIZE(BB), %xmm0 | |||
| addps %xmm0, %xmm7 | |||
| movaps 32 * SIZE(AA), %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) | |||
| #endif | |||
| mulps %xmm1, %xmm3 | |||
| @@ -1936,7 +1936,7 @@ | |||
| .L102: | |||
| mulps %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movsd 2 * SIZE(AA), %xmm0 | |||
| @@ -2069,7 +2069,7 @@ | |||
| .L112: | |||
| mulss %xmm0, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) | |||
| #endif | |||
| movss 1 * SIZE(AA), %xmm0 | |||
| @@ -71,7 +71,7 @@ | |||
| #define movsd movlps | |||
| #endif | |||
| #ifdef BARCELONA | |||
| #if defined(BARCELONA) || defined(BULLDOZER) | |||
| #define PREFETCH prefetchnta | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHSIZE (16 * 5) | |||
| @@ -58,7 +58,7 @@ | |||
| #define movsd movlps | |||
| #endif | |||
| #ifdef BARCELONA | |||
| #if defined(BARCELONA) || defined(BULLDOZER) | |||
| #define PREFETCH prefetchnta | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHSIZE (8 * 5) | |||
| @@ -71,7 +71,7 @@ | |||
| #define movsd movlps | |||
| #endif | |||
| #ifdef BARCELONA | |||
| #if defined(BARCELONA) || defined(BULLDOZER) | |||
| #define PREFETCH prefetchnta | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHSIZE (16 * 5) | |||
| @@ -58,7 +58,7 @@ | |||
| #define movsd movlps | |||
| #endif | |||
| #ifdef BARCELONA | |||
| #if defined(BARCELONA) || defined(BULLDOZER) | |||
| #define PREFETCH prefetchnta | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHSIZE (8 * 5) | |||
| @@ -75,7 +75,7 @@ | |||
| #define STACK_ALIGN 4096 | |||
| #define STACK_OFFSET 1024 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCHSIZE (16 * 10 + 8) | |||
| #define WPREFETCHSIZE 112 | |||
| #define PREFETCH prefetch | |||
| @@ -533,7 +533,7 @@ | |||
| addps %xmm0, %xmm7 | |||
| movsd 16 * SIZE(AA), %xmm0 | |||
| mulps %xmm1, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht1 (PREFETCHSIZE + 16) * SIZE(AA) | |||
| #endif | |||
| addps %xmm2, %xmm4 | |||
| @@ -75,7 +75,7 @@ | |||
| #define STACK_ALIGN 4096 | |||
| #define STACK_OFFSET 1024 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCHSIZE (16 * 10 + 8) | |||
| #define WPREFETCHSIZE 112 | |||
| #define PREFETCH prefetch | |||
| @@ -994,7 +994,7 @@ | |||
| addps %xmm0, %xmm7 | |||
| movsd 16 * SIZE(AA), %xmm0 | |||
| mulps %xmm1, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht1 (PREFETCHSIZE + 16) * SIZE(AA) | |||
| #endif | |||
| addps %xmm2, %xmm4 | |||
| @@ -75,7 +75,7 @@ | |||
| #define STACK_ALIGN 4096 | |||
| #define STACK_OFFSET 1024 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCHSIZE (16 * 10 + 8) | |||
| #define WPREFETCHSIZE 112 | |||
| #define PREFETCH prefetch | |||
| @@ -1820,7 +1820,7 @@ | |||
| addps %xmm0, %xmm7 | |||
| movsd 16 * SIZE(AA), %xmm0 | |||
| mulps %xmm1, %xmm2 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) | |||
| prefetcht1 (PREFETCHSIZE + 16) * SIZE(AA) | |||
| #endif | |||
| addps %xmm2, %xmm4 | |||
| @@ -0,0 +1,62 @@ | |||
| ZGEMVNKERNEL = zgemv_n_dup.S | |||
| ZGEMVTKERNEL = zgemv_t_dup.S | |||
| SGEMMKERNEL = gemm_kernel_8x4_barcelona.S | |||
| SGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||
| SGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||
| SGEMMONCOPY = gemm_ncopy_4_opteron.S | |||
| SGEMMOTCOPY = gemm_tcopy_4_opteron.S | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMKERNEL = dgemm_kernel_4x4_bulldozer.S | |||
| DGEMMINCOPY = | |||
| DGEMMITCOPY = | |||
| DGEMMONCOPY = gemm_ncopy_4_opteron.S | |||
| DGEMMOTCOPY = gemm_tcopy_4_opteron.S | |||
| DGEMMINCOPYOBJ = | |||
| DGEMMITCOPYOBJ = | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMKERNEL = zgemm_kernel_4x2_barcelona.S | |||
| CGEMMINCOPY = ../generic/zgemm_ncopy_4.c | |||
| CGEMMITCOPY = ../generic/zgemm_tcopy_4.c | |||
| CGEMMONCOPY = zgemm_ncopy_2.S | |||
| CGEMMOTCOPY = zgemm_tcopy_2.S | |||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMKERNEL = zgemm_kernel_2x2_barcelona.S | |||
| ZGEMMINCOPY = | |||
| ZGEMMITCOPY = | |||
| ZGEMMONCOPY = zgemm_ncopy_2.S | |||
| ZGEMMOTCOPY = zgemm_tcopy_2.S | |||
| ZGEMMINCOPYOBJ = | |||
| ZGEMMITCOPYOBJ = | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| STRSMKERNEL_LN = trsm_kernel_LN_8x4_sse.S | |||
| STRSMKERNEL_LT = trsm_kernel_LT_8x4_sse.S | |||
| STRSMKERNEL_RN = trsm_kernel_LT_8x4_sse.S | |||
| STRSMKERNEL_RT = trsm_kernel_RT_8x4_sse.S | |||
| DTRSMKERNEL_LN = trsm_kernel_LN_4x4_barcelona.S | |||
| DTRSMKERNEL_LT = trsm_kernel_LT_4x4_barcelona.S | |||
| DTRSMKERNEL_RN = trsm_kernel_LT_4x4_barcelona.S | |||
| DTRSMKERNEL_RT = trsm_kernel_RT_4x4_barcelona.S | |||
| CTRSMKERNEL_LN = ztrsm_kernel_LN_4x2_sse.S | |||
| CTRSMKERNEL_LT = ztrsm_kernel_LT_4x2_sse.S | |||
| CTRSMKERNEL_RN = ztrsm_kernel_LT_4x2_sse.S | |||
| CTRSMKERNEL_RT = ztrsm_kernel_RT_4x2_sse.S | |||
| ZTRSMKERNEL_LN = ztrsm_kernel_LN_2x2_sse2.S | |||
| ZTRSMKERNEL_LT = ztrsm_kernel_LT_2x2_sse2.S | |||
| ZTRSMKERNEL_RN = ztrsm_kernel_LT_2x2_sse2.S | |||
| ZTRSMKERNEL_RT = ztrsm_kernel_RT_2x2_sse2.S | |||
| CGEMM3MKERNEL = zgemm3m_kernel_8x4_barcelona.S | |||
| ZGEMM3MKERNEL = zgemm3m_kernel_4x4_barcelona.S | |||
| @@ -930,7 +930,7 @@ | |||
| .L22: | |||
| mulps %xmm8, %xmm9 | |||
| addps %xmm9, %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | |||
| #endif | |||
| movaps 4 * SIZE(BO), %xmm9 | |||
| @@ -983,7 +983,7 @@ | |||
| addps %xmm8, %xmm3 | |||
| movaps 0 * SIZE(AO), %xmm8 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 16) * SIZE(AO) | |||
| #endif | |||
| mulps %xmm10, %xmm9 | |||
| @@ -1178,7 +1178,7 @@ | |||
| .L32: | |||
| mulps %xmm8, %xmm9 | |||
| addps %xmm9, %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | |||
| #endif | |||
| movsd 4 * SIZE(BO), %xmm9 | |||
| @@ -1423,7 +1423,7 @@ | |||
| .L42: | |||
| mulss %xmm8, %xmm9 | |||
| addss %xmm9, %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | |||
| #endif | |||
| movss 4 * SIZE(BO), %xmm9 | |||
| @@ -1765,7 +1765,7 @@ | |||
| .L62: | |||
| mulps %xmm8, %xmm9 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | |||
| #endif | |||
| mulps 4 * SIZE(BO), %xmm8 | |||
| @@ -1793,7 +1793,7 @@ | |||
| addps %xmm8, %xmm5 | |||
| movaps 32 * SIZE(AO), %xmm8 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 16) * SIZE(AO) | |||
| #endif | |||
| mulps %xmm10, %xmm11 | |||
| @@ -1822,7 +1822,7 @@ | |||
| addps %xmm10, %xmm5 | |||
| movaps 48 * SIZE(AO), %xmm10 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 32) * SIZE(AO) | |||
| #endif | |||
| mulps %xmm12, %xmm13 | |||
| @@ -1851,7 +1851,7 @@ | |||
| addps %xmm12, %xmm5 | |||
| movaps 64 * SIZE(AO), %xmm12 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 48) * SIZE(AO) | |||
| #endif | |||
| mulps %xmm14, %xmm15 | |||
| @@ -2024,7 +2024,7 @@ | |||
| .L72: | |||
| mulps %xmm8, %xmm9 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | |||
| #endif | |||
| @@ -2208,7 +2208,7 @@ | |||
| .L82: | |||
| mulps %xmm8, %xmm9 | |||
| addps %xmm9, %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | |||
| #endif | |||
| movsd 4 * SIZE(BO), %xmm9 | |||
| @@ -2395,7 +2395,7 @@ | |||
| .L92: | |||
| mulps %xmm8, %xmm9 | |||
| addps %xmm9, %xmm0 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | |||
| #endif | |||
| movss 4 * SIZE(BO), %xmm9 | |||
| @@ -2670,7 +2670,7 @@ | |||
| .L112: | |||
| mulps %xmm9, %xmm8 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | |||
| #endif | |||
| @@ -2687,7 +2687,7 @@ | |||
| addps %xmm9, %xmm4 | |||
| movaps 8 * SIZE(BO), %xmm9 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 16) * SIZE(AO) | |||
| #endif | |||
| mulps %xmm9, %xmm10 | |||
| @@ -2704,7 +2704,7 @@ | |||
| addps %xmm9, %xmm4 | |||
| movaps 32 * SIZE(BO), %xmm9 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 32) * SIZE(AO) | |||
| #endif | |||
| mulps %xmm11, %xmm12 | |||
| @@ -2721,7 +2721,7 @@ | |||
| addps %xmm11, %xmm4 | |||
| movaps 24 * SIZE(BO), %xmm11 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 48) * SIZE(AO) | |||
| #endif | |||
| mulps %xmm11, %xmm14 | |||
| @@ -2857,7 +2857,7 @@ | |||
| .L122: | |||
| mulps %xmm8, %xmm9 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | |||
| #endif | |||
| movaps -28 * SIZE(AO), %xmm8 | |||
| @@ -2873,7 +2873,7 @@ | |||
| addps %xmm8, %xmm3 | |||
| movaps 0 * SIZE(AO), %xmm8 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 16) * SIZE(AO) | |||
| #endif | |||
| mulps %xmm10, %xmm11 | |||
| @@ -3003,7 +3003,7 @@ | |||
| .L132: | |||
| mulps %xmm8, %xmm9 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | |||
| #endif | |||
| movsd -30 * SIZE(AO), %xmm8 | |||
| @@ -3150,7 +3150,7 @@ | |||
| .L142: | |||
| mulss %xmm8, %xmm9 | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) | |||
| #endif | |||
| movss -31 * SIZE(AO), %xmm8 | |||
| @@ -39,7 +39,7 @@ | |||
| #define ASSEMBLER | |||
| #include "common.h" | |||
| #if defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| #define RPREFETCHSIZE (12 + 4) | |||
| #define WPREFETCHSIZE (48 + 4) | |||
| #define MOVNTQ MOVQ | |||
| @@ -79,7 +79,7 @@ | |||
| #define AO3 %r13 | |||
| #define AO4 %rax | |||
| #if defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| #define RPREFETCH prefetch | |||
| #else | |||
| #define RPREFETCH prefetch | |||
| @@ -39,7 +39,7 @@ | |||
| #define ASSEMBLER | |||
| #include "common.h" | |||
| #if defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| #define RPREFETCHSIZE (12 + 4) | |||
| #define WPREFETCHSIZE (12 + 4) | |||
| #define MOVNTQ MOVQ | |||
| @@ -96,7 +96,7 @@ | |||
| #endif | |||
| #if defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| #define RPREFETCH prefetch | |||
| #else | |||
| #define RPREFETCH prefetch | |||
| @@ -469,7 +469,7 @@ | |||
| ALIGN_4 | |||
| .L71: | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| prefetch PREFETCHSIZE * SIZE(X) | |||
| #endif | |||
| @@ -266,7 +266,7 @@ | |||
| sarq $5, I | |||
| jle .L113 | |||
| #if defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| movaps %xmm0, %xmm1 | |||
| mulps -32 * SIZE(X), %xmm1 | |||
| @@ -251,7 +251,7 @@ | |||
| sarq $4, I | |||
| jle .L113 | |||
| #if defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| movaps %xmm0, %xmm1 | |||
| mulpd -16 * SIZE(X), %xmm1 | |||
| @@ -76,7 +76,7 @@ | |||
| #define movsd movlps | |||
| #endif | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHSIZE (16 * 16) | |||
| @@ -76,7 +76,7 @@ | |||
| #define movsd movlpd | |||
| #endif | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHSIZE (16 * 16) | |||
| @@ -76,7 +76,7 @@ | |||
| #define movsd movlps | |||
| #endif | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHSIZE (16 * 16) | |||
| @@ -76,7 +76,7 @@ | |||
| #define movsd movlpd | |||
| #endif | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHSIZE (16 * 16) | |||
| @@ -86,7 +86,7 @@ | |||
| #define PREFETCHW prefetcht0 | |||
| #endif | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define movsd movlps | |||
| @@ -86,7 +86,7 @@ | |||
| #define PREFETCHW prefetcht0 | |||
| #endif | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define movsd movlps | |||
| @@ -86,7 +86,7 @@ | |||
| #define PREFETCHW prefetcht0 | |||
| #endif | |||
| #if defined(OPTERON) || defined(BARCELONA) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define movsd movlps | |||
| @@ -85,7 +85,7 @@ | |||
| #define movsd movlpd | |||
| #endif | |||
| #if defined(BARCELONA) || defined(SHANGHAI) | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| #define RPREFETCHSIZE 32 | |||
| #define WPREFETCHSIZE 48 | |||
| #endif | |||
| @@ -160,7 +160,7 @@ | |||
| #define a3 %xmm14 | |||
| #define xt1 %xmm15 | |||
| #if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||
| #if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define MOVDDUP(a, b, c) movddup a(b), c | |||
| #define MOVDDUP2(a, b, c) movddup a##b, c | |||
| #else | |||
| @@ -76,7 +76,7 @@ | |||
| #define movsd movlpd | |||
| #endif | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHSIZE (16 * 16) | |||
| @@ -167,7 +167,7 @@ | |||
| #define a3 %xmm14 | |||
| #define xt1 %xmm15 | |||
| #if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| #define MOVDDUP(a, b, c) movddup a(b), c | |||
| #define MOVDDUP2(a, b, c) movddup a##b, c | |||
| #else | |||
| @@ -76,7 +76,7 @@ | |||
| #define movsd movlpd | |||
| #endif | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHSIZE (16 * 16) | |||
| @@ -166,7 +166,7 @@ | |||
| #define xt1 %xmm14 | |||
| #define xt2 %xmm15 | |||
| #if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| #define MOVDDUP(a, b, c) movddup a(b), c | |||
| #define MOVDDUP2(a, b, c) movddup a##b, c | |||
| #else | |||
| @@ -76,7 +76,7 @@ | |||
| #define movsd movlpd | |||
| #endif | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHSIZE (16 * 16) | |||
| @@ -166,7 +166,7 @@ | |||
| #define a3 %xmm14 | |||
| #define xt1 %xmm15 | |||
| #if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) | |||
| #if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) | |||
| #define MOVDDUP(a, b, c) movddup a(b), c | |||
| #define MOVDDUP2(a, b, c) movddup a##b, c | |||
| #else | |||
| @@ -86,7 +86,7 @@ | |||
| #define BORIG 72(%rsp) | |||
| #define BUFFER 128(%rsp) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHNTA prefetchnta | |||
| @@ -95,7 +95,7 @@ | |||
| #define PREFETCHSIZE (8 * 6 + 4) | |||
| #endif | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHNTA prefetchnta | |||
| @@ -86,7 +86,7 @@ | |||
| #define BORIG 72(%rsp) | |||
| #define BUFFER 128(%rsp) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHNTA prefetchnta | |||
| @@ -95,7 +95,7 @@ | |||
| #define PREFETCHSIZE (8 * 6 + 4) | |||
| #endif | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHNTA prefetchnta | |||
| @@ -86,7 +86,7 @@ | |||
| #define BORIG 72(%rsp) | |||
| #define BUFFER 128(%rsp) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHNTA prefetchnta | |||
| @@ -95,7 +95,7 @@ | |||
| #define PREFETCHSIZE (8 * 6 + 4) | |||
| #endif | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||
| #if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHNTA prefetchnta | |||
| @@ -74,6 +74,13 @@ | |||
| #define ALIGNED_ACCESS | |||
| #endif | |||
| #ifdef BULLDOZER | |||
| #define PREFETCH prefetch | |||
| #define PREFETCHW prefetchw | |||
| #define PREFETCHSIZE (128 * 5) | |||
| #define ALIGNED_ACCESS | |||
| #endif | |||
| #ifdef NANO | |||
| #define PREFETCH prefetcht0 | |||
| #define PREFETCHW prefetcht0 | |||
| @@ -85,7 +85,7 @@ | |||
| #define movsd movlps | |||
| #endif | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define ALIGNED_ACCESS | |||
| #define MOVUPS_A movaps | |||
| #define MOVUPS_XL movaps | |||
| @@ -143,7 +143,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #endif | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) | |||
| #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) | |||
| #define SNUMOPT 8 | |||
| #define DNUMOPT 4 | |||