| @@ -68,6 +68,9 @@ endif | |||||
| ifeq ($(TARGET), EXCAVATOR) | ifeq ($(TARGET), EXCAVATOR) | ||||
| GETARCH_FLAGS := -DFORCE_BARCELONA | GETARCH_FLAGS := -DFORCE_BARCELONA | ||||
| endif | endif | ||||
| ifeq ($(TARGET), ZEN) | |||||
| GETARCH_FLAGS := -DFORCE_BARCELONA | |||||
| endif | |||||
| endif | endif | ||||
| @@ -98,6 +101,9 @@ endif | |||||
| ifeq ($(TARGET_CORE), EXCAVATOR) | ifeq ($(TARGET_CORE), EXCAVATOR) | ||||
| GETARCH_FLAGS := -DFORCE_BARCELONA | GETARCH_FLAGS := -DFORCE_BARCELONA | ||||
| endif | endif | ||||
| ifeq ($(TARGET_CORE), ZEN) | |||||
| GETARCH_FLAGS := -DFORCE_BARCELONA | |||||
| endif | |||||
| endif | endif | ||||
| @@ -443,7 +449,7 @@ ifneq ($(NO_AVX), 1) | |||||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR | DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR | ||||
| endif | endif | ||||
| ifneq ($(NO_AVX2), 1) | ifneq ($(NO_AVX2), 1) | ||||
| DYNAMIC_CORE += HASWELL | |||||
| DYNAMIC_CORE += HASWELL ZEN | |||||
| endif | endif | ||||
| endif | endif | ||||
| @@ -34,6 +34,7 @@ BULLDOZER | |||||
| PILEDRIVER | PILEDRIVER | ||||
| STEAMROLLER | STEAMROLLER | ||||
| EXCAVATOR | EXCAVATOR | ||||
| ZEN | |||||
| c)VIA CPU: | c)VIA CPU: | ||||
| SSE_GENERIC | SSE_GENERIC | ||||
| @@ -73,7 +73,7 @@ if (DYNAMIC_ARCH) | |||||
| set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER") | set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER") | ||||
| endif () | endif () | ||||
| if (NOT NO_AVX2) | if (NOT NO_AVX2) | ||||
| set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL") | |||||
| set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL ZEN") | |||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| @@ -22,7 +22,7 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) | |||||
| if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE") | if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE") | ||||
| set(TARGET "NEHALEM") | set(TARGET "NEHALEM") | ||||
| endif () | endif () | ||||
| if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER") | |||||
| if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN") | |||||
| set(TARGET "BARCELONA") | set(TARGET "BARCELONA") | ||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| @@ -114,6 +114,7 @@ | |||||
| #define CORE_HASWELL 24 | #define CORE_HASWELL 24 | ||||
| #define CORE_STEAMROLLER 25 | #define CORE_STEAMROLLER 25 | ||||
| #define CORE_EXCAVATOR 26 | #define CORE_EXCAVATOR 26 | ||||
| #define CORE_ZEN 27 | |||||
| #define HAVE_SSE (1 << 0) | #define HAVE_SSE (1 << 0) | ||||
| #define HAVE_SSE2 (1 << 1) | #define HAVE_SSE2 (1 << 1) | ||||
| @@ -209,5 +210,6 @@ typedef struct { | |||||
| #define CPUTYPE_HASWELL 48 | #define CPUTYPE_HASWELL 48 | ||||
| #define CPUTYPE_STEAMROLLER 49 | #define CPUTYPE_STEAMROLLER 49 | ||||
| #define CPUTYPE_EXCAVATOR 50 | #define CPUTYPE_EXCAVATOR 50 | ||||
| #define CPUTYPE_ZEN 51 | |||||
| #endif | #endif | ||||
| @@ -1281,6 +1281,8 @@ int get_cpuname(void){ | |||||
| case 3: | case 3: | ||||
| case 10: | case 10: | ||||
| return CPUTYPE_BARCELONA; | return CPUTYPE_BARCELONA; | ||||
| case 5: | |||||
| return CPUTYPE_BOBCAT; | |||||
| case 6: | case 6: | ||||
| switch (model) { | switch (model) { | ||||
| case 1: | case 1: | ||||
| @@ -1295,8 +1297,8 @@ int get_cpuname(void){ | |||||
| return CPUTYPE_PILEDRIVER; | return CPUTYPE_PILEDRIVER; | ||||
| else | else | ||||
| return CPUTYPE_BARCELONA; //OS don't support AVX. | return CPUTYPE_BARCELONA; //OS don't support AVX. | ||||
| case 5: // New EXCAVATOR CPUS | |||||
| if(support_avx()) | |||||
| case 5: // New EXCAVATOR CPUS | |||||
| if(support_avx()) | |||||
| return CPUTYPE_EXCAVATOR; | return CPUTYPE_EXCAVATOR; | ||||
| else | else | ||||
| return CPUTYPE_BARCELONA; //OS don't support AVX. | return CPUTYPE_BARCELONA; //OS don't support AVX. | ||||
| @@ -1322,8 +1324,19 @@ int get_cpuname(void){ | |||||
| break; | break; | ||||
| } | } | ||||
| break; | break; | ||||
| case 5: | |||||
| return CPUTYPE_BOBCAT; | |||||
| case 8: | |||||
| switch (model) { | |||||
| case 1: | |||||
| // AMD Ryzen | |||||
| if(support_avx()) | |||||
| #ifndef NO_AVX2 | |||||
| return CPUTYPE_ZEN; | |||||
| #else | |||||
| return CPUTYPE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator | |||||
| #endif | |||||
| else | |||||
| return CPUTYPE_BARCELONA; | |||||
| } | |||||
| } | } | ||||
| break; | break; | ||||
| } | } | ||||
| @@ -1450,6 +1463,7 @@ static char *cpuname[] = { | |||||
| "HASWELL", | "HASWELL", | ||||
| "STEAMROLLER", | "STEAMROLLER", | ||||
| "EXCAVATOR", | "EXCAVATOR", | ||||
| "ZEN", | |||||
| }; | }; | ||||
| static char *lowercpuname[] = { | static char *lowercpuname[] = { | ||||
| @@ -1503,6 +1517,7 @@ static char *lowercpuname[] = { | |||||
| "haswell", | "haswell", | ||||
| "steamroller", | "steamroller", | ||||
| "excavator", | "excavator", | ||||
| "zen", | |||||
| }; | }; | ||||
| static char *corename[] = { | static char *corename[] = { | ||||
| @@ -1533,6 +1548,7 @@ static char *corename[] = { | |||||
| "HASWELL", | "HASWELL", | ||||
| "STEAMROLLER", | "STEAMROLLER", | ||||
| "EXCAVATOR", | "EXCAVATOR", | ||||
| "ZEN", | |||||
| }; | }; | ||||
| static char *corename_lower[] = { | static char *corename_lower[] = { | ||||
| @@ -1563,6 +1579,7 @@ static char *corename_lower[] = { | |||||
| "haswell", | "haswell", | ||||
| "steamroller", | "steamroller", | ||||
| "excavator", | "excavator", | ||||
| "zen", | |||||
| }; | }; | ||||
| @@ -1776,15 +1793,16 @@ int get_coretype(void){ | |||||
| break; | break; | ||||
| case 9: | case 9: | ||||
| case 8: | case 8: | ||||
| if (model == 14) // Kaby Lake | |||||
| if (model == 14) { // Kaby Lake | |||||
| if(support_avx()) | if(support_avx()) | ||||
| #ifndef NO_AVX2 | #ifndef NO_AVX2 | ||||
| return CORE_HASWELL; | |||||
| return CORE_HASWELL; | |||||
| #else | #else | ||||
| return CORE_SANDYBRIDGE; | |||||
| return CORE_SANDYBRIDGE; | |||||
| #endif | #endif | ||||
| else | else | ||||
| return CORE_NEHALEM; | return CORE_NEHALEM; | ||||
| } | |||||
| } | } | ||||
| break; | break; | ||||
| @@ -1841,9 +1859,22 @@ int get_coretype(void){ | |||||
| } | } | ||||
| break; | break; | ||||
| } | } | ||||
| }else return CORE_BARCELONA; | |||||
| } else if (exfamily == 8) { | |||||
| switch (model) { | |||||
| case 1: | |||||
| // AMD Ryzen | |||||
| if(support_avx()) | |||||
| #ifndef NO_AVX2 | |||||
| return CORE_ZEN; | |||||
| #else | |||||
| return CORE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator | |||||
| #endif | |||||
| else | |||||
| return CORE_BARCELONA; | |||||
| } | |||||
| } else { | |||||
| return CORE_BARCELONA; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| @@ -70,8 +70,10 @@ extern gotoblas_t gotoblas_STEAMROLLER; | |||||
| extern gotoblas_t gotoblas_EXCAVATOR; | extern gotoblas_t gotoblas_EXCAVATOR; | ||||
| #ifdef NO_AVX2 | #ifdef NO_AVX2 | ||||
| #define gotoblas_HASWELL gotoblas_SANDYBRIDGE | #define gotoblas_HASWELL gotoblas_SANDYBRIDGE | ||||
| #define gotoblas_ZEN gotoblas_SANDYBRIDGE | |||||
| #else | #else | ||||
| extern gotoblas_t gotoblas_HASWELL; | extern gotoblas_t gotoblas_HASWELL; | ||||
| extern gotoblas_t gotoblas_ZEN; | |||||
| #endif | #endif | ||||
| #else | #else | ||||
| //Use NEHALEM kernels for sandy bridge | //Use NEHALEM kernels for sandy bridge | ||||
| @@ -81,6 +83,7 @@ extern gotoblas_t gotoblas_HASWELL; | |||||
| #define gotoblas_PILEDRIVER gotoblas_BARCELONA | #define gotoblas_PILEDRIVER gotoblas_BARCELONA | ||||
| #define gotoblas_STEAMROLLER gotoblas_BARCELONA | #define gotoblas_STEAMROLLER gotoblas_BARCELONA | ||||
| #define gotoblas_EXCAVATOR gotoblas_BARCELONA | #define gotoblas_EXCAVATOR gotoblas_BARCELONA | ||||
| #define gotoblas_ZEN gotoblas_BARCELONA | |||||
| #endif | #endif | ||||
| @@ -355,14 +358,14 @@ static gotoblas_t *get_coretype(void){ | |||||
| openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); | openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); | ||||
| return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | ||||
| } | } | ||||
| }else if(model == 5){ | |||||
| if(support_avx()) | |||||
| return &gotoblas_EXCAVATOR; | |||||
| else{ | |||||
| openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); | |||||
| return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | |||||
| } | |||||
| }else if(model == 0){ | |||||
| }else if(model == 5){ | |||||
| if(support_avx()) | |||||
| return &gotoblas_EXCAVATOR; | |||||
| else{ | |||||
| openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); | |||||
| return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | |||||
| } | |||||
| }else if(model == 0){ | |||||
| if (exmodel == 1) { | if (exmodel == 1) { | ||||
| //AMD Trinity | //AMD Trinity | ||||
| if(support_avx()) | if(support_avx()) | ||||
| @@ -389,9 +392,16 @@ static gotoblas_t *get_coretype(void){ | |||||
| } | } | ||||
| } | } | ||||
| } else { | |||||
| } else if (family == 8) { | |||||
| if (model == 1) { | |||||
| if(support_avx()) | |||||
| return &gotoblas_ZEN; | |||||
| else{ | |||||
| openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); | |||||
| return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | |||||
| } | |||||
| } | |||||
| }else { | |||||
| return &gotoblas_BARCELONA; | return &gotoblas_BARCELONA; | ||||
| } | } | ||||
| } | } | ||||
| @@ -431,6 +441,7 @@ static char *corename[] = { | |||||
| "Haswell", | "Haswell", | ||||
| "Steamroller", | "Steamroller", | ||||
| "Excavator", | "Excavator", | ||||
| "Zen" | |||||
| }; | }; | ||||
| char *gotoblas_corename(void) { | char *gotoblas_corename(void) { | ||||
| @@ -457,6 +468,7 @@ char *gotoblas_corename(void) { | |||||
| if (gotoblas == &gotoblas_HASWELL) return corename[20]; | if (gotoblas == &gotoblas_HASWELL) return corename[20]; | ||||
| if (gotoblas == &gotoblas_STEAMROLLER) return corename[21]; | if (gotoblas == &gotoblas_STEAMROLLER) return corename[21]; | ||||
| if (gotoblas == &gotoblas_EXCAVATOR) return corename[22]; | if (gotoblas == &gotoblas_EXCAVATOR) return corename[22]; | ||||
| if (gotoblas == &gotoblas_ZEN) return corename[23]; | |||||
| return corename[0]; | return corename[0]; | ||||
| } | } | ||||
| @@ -487,6 +499,7 @@ static gotoblas_t *force_coretype(char *coretype){ | |||||
| switch (found) | switch (found) | ||||
| { | { | ||||
| case 23: return (&gotoblas_ZEN); | |||||
| case 22: return (&gotoblas_EXCAVATOR); | case 22: return (&gotoblas_EXCAVATOR); | ||||
| case 21: return (&gotoblas_STEAMROLLER); | case 21: return (&gotoblas_STEAMROLLER); | ||||
| case 20: return (&gotoblas_HASWELL); | case 20: return (&gotoblas_HASWELL); | ||||
| @@ -167,7 +167,7 @@ int get_L2_size(void){ | |||||
| #if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ | #if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ | ||||
| defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ | defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ | ||||
| defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \ | defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \ | ||||
| defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) | |||||
| defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) | |||||
| cpuid(0x80000006, &eax, &ebx, &ecx, &edx); | cpuid(0x80000006, &eax, &ebx, &ecx, &edx); | ||||
| @@ -251,7 +251,7 @@ int get_L2_size(void){ | |||||
| void blas_set_parameter(void){ | void blas_set_parameter(void){ | ||||
| int factor; | int factor; | ||||
| #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) | |||||
| #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) | |||||
| int size = 16; | int size = 16; | ||||
| #else | #else | ||||
| int size = get_L2_size(); | int size = get_L2_size(); | ||||
| @@ -473,6 +473,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define CORENAME "EXCAVATOR" | #define CORENAME "EXCAVATOR" | ||||
| #endif | #endif | ||||
| #if defined (FORCE_ZEN) | |||||
| #define FORCE | |||||
| #define FORCE_INTEL | |||||
| #define ARCHITECTURE "X86" | |||||
| #define SUBARCHITECTURE "ZEN" | |||||
| #define ARCHCONFIG "-DZEN " \ | |||||
| "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL2_CODE_ASSOCIATIVE=8 " \ | |||||
| "-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ | |||||
| "-DL3_SIZE=16777216 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=8 " \ | |||||
| "-DITB_DEFAULT_ENTRIES=64 -DITB_SIZE=4096 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
| "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \ | |||||
| "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \ | |||||
| "-DHAVE_AVX -DHAVE_FMA3 -DFMA3" | |||||
| #define LIBNAME "excavator" | |||||
| #define CORENAME "EXCAVATOR" | |||||
| #endif | |||||
| #ifdef FORCE_SSE_GENERIC | #ifdef FORCE_SSE_GENERIC | ||||
| #define FORCE | #define FORCE | ||||
| @@ -118,7 +118,7 @@ endforeach () | |||||
| # Makefile.L3 | # Makefile.L3 | ||||
| set(USE_TRMM false) | set(USE_TRMM false) | ||||
| if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell") | |||||
| if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "{CORE}" STREQUAL "zen") | |||||
| set(USE_TRMM true) | set(USE_TRMM true) | ||||
| endif () | endif () | ||||
| @@ -32,6 +32,10 @@ ifeq ($(CORE), HASWELL) | |||||
| USE_TRMM = 1 | USE_TRMM = 1 | ||||
| endif | endif | ||||
| ifeq ($(CORE), ZEN) | |||||
| USE_TRMM = 1 | |||||
| endif | |||||
| ifeq ($(CORE), POWER8) | ifeq ($(CORE), POWER8) | ||||
| USE_TRMM = 1 | USE_TRMM = 1 | ||||
| endif | endif | ||||
| @@ -982,6 +982,22 @@ static void init_parameter(void) { | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #ifdef ZEN | |||||
| #ifdef DEBUG | |||||
| fprintf(stderr, "Zen\n"); | |||||
| #endif | |||||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||||
| #ifdef EXPRECISION | |||||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||||
| TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||||
| #endif | |||||
| #endif | |||||
| #ifdef NANO | #ifdef NANO | ||||
| @@ -0,0 +1 @@ | |||||
| include $(KERNELDIR)/KERNEL.BARCELONA | |||||
| @@ -0,0 +1,98 @@ | |||||
| DSCALKERNEL = dscal.c | |||||
| CSCALKERNEL = cscal.c | |||||
| ZSCALKERNEL = zscal.c | |||||
| SGEMVNKERNEL = sgemv_n_4.c | |||||
| SGEMVTKERNEL = sgemv_t_4.c | |||||
| DGEMVNKERNEL = dgemv_n_4.c | |||||
| DGEMVTKERNEL = dgemv_t_4.c | |||||
| ZGEMVNKERNEL = zgemv_n_4.c | |||||
| ZGEMVTKERNEL = zgemv_t_4.c | |||||
| CGEMVNKERNEL = cgemv_n_4.c | |||||
| CGEMVTKERNEL = cgemv_t_4.c | |||||
| SSYMV_L_KERNEL = ssymv_L.c | |||||
| SSYMV_U_KERNEL = ssymv_U.c | |||||
| DSYMV_L_KERNEL = dsymv_L.c | |||||
| DSYMV_U_KERNEL = dsymv_U.c | |||||
| SDOTKERNEL = sdot.c | |||||
| DDOTKERNEL = ddot.c | |||||
| CDOTKERNEL = cdot.c | |||||
| ZDOTKERNEL = zdot.c | |||||
| SAXPYKERNEL = saxpy.c | |||||
| DAXPYKERNEL = daxpy.c | |||||
| CAXPYKERNEL = caxpy.c | |||||
| ZAXPYKERNEL = zaxpy.c | |||||
| STRMMKERNEL = sgemm_kernel_16x4_haswell.S | |||||
| SGEMMKERNEL = sgemm_kernel_16x4_haswell.S | |||||
| SGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||||
| SGEMMITCOPY = ../generic/gemm_tcopy_16.c | |||||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DTRMMKERNEL = dtrmm_kernel_4x8_haswell.c | |||||
| DGEMMKERNEL = dgemm_kernel_4x8_haswell.S | |||||
| DGEMMINCOPY = ../generic/gemm_ncopy_4.c | |||||
| DGEMMITCOPY = ../generic/gemm_tcopy_4.c | |||||
| DGEMMONCOPY = ../generic/gemm_ncopy_8.c | |||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_8.c | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CTRMMKERNEL = cgemm_kernel_8x2_haswell.S | |||||
| CGEMMKERNEL = cgemm_kernel_8x2_haswell.S | |||||
| CGEMMINCOPY = ../generic/zgemm_ncopy_8.c | |||||
| CGEMMITCOPY = ../generic/zgemm_tcopy_8.c | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZTRMMKERNEL = zgemm_kernel_4x2_haswell.S | |||||
| ZGEMMKERNEL = zgemm_kernel_4x2_haswell.S | |||||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_4.c | |||||
| ZGEMMITCOPY = ../generic/zgemm_tcopy_4.c | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| DTRSMKERNEL_RN = dtrsm_kernel_RN_haswell.c | |||||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| CGEMM3MKERNEL = zgemm3m_kernel_4x8_nehalem.S | |||||
| ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S | |||||
| @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "caxpy_microk_steamroller-2.c" | #include "caxpy_microk_steamroller-2.c" | ||||
| #elif defined(BULLDOZER) | #elif defined(BULLDOZER) | ||||
| #include "caxpy_microk_bulldozer-2.c" | #include "caxpy_microk_bulldozer-2.c" | ||||
| #elif defined(HASWELL) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #include "caxpy_microk_haswell-2.c" | #include "caxpy_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "caxpy_microk_sandy-2.c" | #include "caxpy_microk_sandy-2.c" | ||||
| @@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "cdot_microk_bulldozer-2.c" | #include "cdot_microk_bulldozer-2.c" | ||||
| #elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) | #elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) | ||||
| #include "cdot_microk_steamroller-2.c" | #include "cdot_microk_steamroller-2.c" | ||||
| #elif defined(HASWELL) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #include "cdot_microk_haswell-2.c" | #include "cdot_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "cdot_microk_sandy-2.c" | #include "cdot_microk_sandy-2.c" | ||||
| @@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include <stdio.h> | #include <stdio.h> | ||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(HASWELL) | |||||
| #if defined(HASWELL) || defined(ZEN) | |||||
| #include "cgemv_n_microk_haswell-4.c" | #include "cgemv_n_microk_haswell-4.c" | ||||
| #elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
| #include "cgemv_n_microk_bulldozer-4.c" | #include "cgemv_n_microk_bulldozer-4.c" | ||||
| @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(HASWELL) | |||||
| #if defined(HASWELL) || defined(ZEN) | |||||
| #include "cgemv_t_microk_haswell-4.c" | #include "cgemv_t_microk_haswell-4.c" | ||||
| #elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
| #include "cgemv_t_microk_bulldozer-4.c" | #include "cgemv_t_microk_bulldozer-4.c" | ||||
| @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(HASWELL) | |||||
| #if defined(HASWELL) || defined(ZEN) | |||||
| #include "cscal_microk_haswell-2.c" | #include "cscal_microk_haswell-2.c" | ||||
| #elif defined(BULLDOZER) || defined(PILEDRIVER) | #elif defined(BULLDOZER) || defined(PILEDRIVER) | ||||
| #include "cscal_microk_bulldozer-2.c" | #include "cscal_microk_bulldozer-2.c" | ||||
| @@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "daxpy_microk_steamroller-2.c" | #include "daxpy_microk_steamroller-2.c" | ||||
| #elif defined(PILEDRIVER) | #elif defined(PILEDRIVER) | ||||
| #include "daxpy_microk_piledriver-2.c" | #include "daxpy_microk_piledriver-2.c" | ||||
| #elif defined(HASWELL) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #include "daxpy_microk_haswell-2.c" | #include "daxpy_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "daxpy_microk_sandy-2.c" | #include "daxpy_microk_sandy-2.c" | ||||
| @@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "ddot_microk_piledriver-2.c" | #include "ddot_microk_piledriver-2.c" | ||||
| #elif defined(NEHALEM) | #elif defined(NEHALEM) | ||||
| #include "ddot_microk_nehalem-2.c" | #include "ddot_microk_nehalem-2.c" | ||||
| #elif defined(HASWELL) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #include "ddot_microk_haswell-2.c" | #include "ddot_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "ddot_microk_sandy-2.c" | #include "ddot_microk_sandy-2.c" | ||||
| @@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(NEHALEM) | #if defined(NEHALEM) | ||||
| #include "dgemv_n_microk_nehalem-4.c" | #include "dgemv_n_microk_nehalem-4.c" | ||||
| #elif defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) | |||||
| #include "dgemv_n_microk_haswell-4.c" | #include "dgemv_n_microk_haswell-4.c" | ||||
| #endif | #endif | ||||
| @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) | |||||
| #if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) | |||||
| #include "dgemv_t_microk_haswell-4.c" | #include "dgemv_t_microk_haswell-4.c" | ||||
| #endif | #endif | ||||
| @@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "dscal_microk_bulldozer-2.c" | #include "dscal_microk_bulldozer-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "dscal_microk_sandy-2.c" | #include "dscal_microk_sandy-2.c" | ||||
| #elif defined(HASWELL) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #include "dscal_microk_haswell-2.c" | #include "dscal_microk_haswell-2.c" | ||||
| #endif | #endif | ||||
| @@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
| #include "dsymv_L_microk_bulldozer-2.c" | #include "dsymv_L_microk_bulldozer-2.c" | ||||
| #elif defined(HASWELL) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #include "dsymv_L_microk_haswell-2.c" | #include "dsymv_L_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "dsymv_L_microk_sandy-2.c" | #include "dsymv_L_microk_sandy-2.c" | ||||
| @@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
| #include "dsymv_U_microk_bulldozer-2.c" | #include "dsymv_U_microk_bulldozer-2.c" | ||||
| #elif defined(HASWELL) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #include "dsymv_U_microk_haswell-2.c" | #include "dsymv_U_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "dsymv_U_microk_sandy-2.c" | #include "dsymv_U_microk_sandy-2.c" | ||||
| @@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(NEHALEM) | #if defined(NEHALEM) | ||||
| #include "saxpy_microk_nehalem-2.c" | #include "saxpy_microk_nehalem-2.c" | ||||
| #elif defined(HASWELL) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #include "saxpy_microk_haswell-2.c" | #include "saxpy_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "saxpy_microk_sandy-2.c" | #include "saxpy_microk_sandy-2.c" | ||||
| @@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "sdot_microk_steamroller-2.c" | #include "sdot_microk_steamroller-2.c" | ||||
| #elif defined(NEHALEM) | #elif defined(NEHALEM) | ||||
| #include "sdot_microk_nehalem-2.c" | #include "sdot_microk_nehalem-2.c" | ||||
| #elif defined(HASWELL) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #include "sdot_microk_haswell-2.c" | #include "sdot_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "sdot_microk_sandy-2.c" | #include "sdot_microk_sandy-2.c" | ||||
| @@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "sgemv_n_microk_nehalem-4.c" | #include "sgemv_n_microk_nehalem-4.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "sgemv_n_microk_sandy-4.c" | #include "sgemv_n_microk_sandy-4.c" | ||||
| #elif defined(HASWELL) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #include "sgemv_n_microk_haswell-4.c" | #include "sgemv_n_microk_haswell-4.c" | ||||
| #endif | #endif | ||||
| @@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "sgemv_t_microk_bulldozer-4.c" | #include "sgemv_t_microk_bulldozer-4.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "sgemv_t_microk_sandy-4.c" | #include "sgemv_t_microk_sandy-4.c" | ||||
| #elif defined(HASWELL) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #include "sgemv_t_microk_haswell-4.c" | #include "sgemv_t_microk_haswell-4.c" | ||||
| #endif | #endif | ||||
| @@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "ssymv_L_microk_bulldozer-2.c" | #include "ssymv_L_microk_bulldozer-2.c" | ||||
| #elif defined(NEHALEM) | #elif defined(NEHALEM) | ||||
| #include "ssymv_L_microk_nehalem-2.c" | #include "ssymv_L_microk_nehalem-2.c" | ||||
| #elif defined(HASWELL) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #include "ssymv_L_microk_haswell-2.c" | #include "ssymv_L_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "ssymv_L_microk_sandy-2.c" | #include "ssymv_L_microk_sandy-2.c" | ||||
| @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "ssymv_U_microk_bulldozer-2.c" | #include "ssymv_U_microk_bulldozer-2.c" | ||||
| #elif defined(NEHALEM) | #elif defined(NEHALEM) | ||||
| #include "ssymv_U_microk_nehalem-2.c" | #include "ssymv_U_microk_nehalem-2.c" | ||||
| #elif defined(HASWELL) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #include "ssymv_U_microk_haswell-2.c" | #include "ssymv_U_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "ssymv_U_microk_sandy-2.c" | #include "ssymv_U_microk_sandy-2.c" | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "zaxpy_microk_bulldozer-2.c" | #include "zaxpy_microk_bulldozer-2.c" | ||||
| #elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
| #include "zaxpy_microk_steamroller-2.c" | #include "zaxpy_microk_steamroller-2.c" | ||||
| #elif defined(HASWELL) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #include "zaxpy_microk_haswell-2.c" | #include "zaxpy_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "zaxpy_microk_sandy-2.c" | #include "zaxpy_microk_sandy-2.c" | ||||
| @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "zdot_microk_bulldozer-2.c" | #include "zdot_microk_bulldozer-2.c" | ||||
| #elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) | #elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) | ||||
| #include "zdot_microk_steamroller-2.c" | #include "zdot_microk_steamroller-2.c" | ||||
| #elif defined(HASWELL) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #include "zdot_microk_haswell-2.c" | #include "zdot_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "zdot_microk_sandy-2.c" | #include "zdot_microk_sandy-2.c" | ||||
| @@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(HASWELL) | |||||
| #if defined(HASWELL) || defined(ZEN) | |||||
| #include "zgemv_n_microk_haswell-4.c" | #include "zgemv_n_microk_haswell-4.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "zgemv_n_microk_sandy-4.c" | #include "zgemv_n_microk_sandy-4.c" | ||||
| @@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
| #include "zgemv_t_microk_bulldozer-4.c" | #include "zgemv_t_microk_bulldozer-4.c" | ||||
| #elif defined(HASWELL) | |||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #include "zgemv_t_microk_haswell-4.c" | #include "zgemv_t_microk_haswell-4.c" | ||||
| #endif | #endif | ||||
| @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(HASWELL) | |||||
| #if defined(HASWELL) || defined(ZEN) | |||||
| #include "zscal_microk_haswell-2.c" | #include "zscal_microk_haswell-2.c" | ||||
| #elif defined(BULLDOZER) || defined(PILEDRIVER) | #elif defined(BULLDOZER) || defined(PILEDRIVER) | ||||
| #include "zscal_microk_bulldozer-2.c" | #include "zscal_microk_bulldozer-2.c" | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| @@ -595,6 +595,96 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #ifdef ZEN | |||||
| #define SNUMOPT 8 | |||||
| #define DNUMOPT 4 | |||||
| #define GEMM_DEFAULT_OFFSET_A 64 | |||||
| #define GEMM_DEFAULT_OFFSET_B 832 | |||||
| #define GEMM_DEFAULT_ALIGN 0x0fffUL | |||||
| #define QGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define CGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define ZGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define XGEMM_DEFAULT_UNROLL_N 1 | |||||
| #ifdef ARCH_X86 | |||||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define DGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define SGEMM_DEFAULT_UNROLL_M 4 | |||||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define QGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define CGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define ZGEMM_DEFAULT_UNROLL_M 1 | |||||
| #define XGEMM_DEFAULT_UNROLL_M 1 | |||||
| #else | |||||
| #define SGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define DGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define SGEMM_DEFAULT_UNROLL_M 16 | |||||
| #define DGEMM_DEFAULT_UNROLL_M 8 | |||||
| #define QGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define CGEMM_DEFAULT_UNROLL_M 4 | |||||
| #define ZGEMM_DEFAULT_UNROLL_M 4 | |||||
| #define XGEMM_DEFAULT_UNROLL_M 1 | |||||
| #define CGEMM3M_DEFAULT_UNROLL_N 4 | |||||
| #define CGEMM3M_DEFAULT_UNROLL_M 8 | |||||
| #define ZGEMM3M_DEFAULT_UNROLL_N 4 | |||||
| #define ZGEMM3M_DEFAULT_UNROLL_M 4 | |||||
| #define GEMV_UNROLL 8 | |||||
| #endif | |||||
| #if defined(ARCH_X86_64) | |||||
| #define SGEMM_DEFAULT_P 768 | |||||
| #define DGEMM_DEFAULT_P 576 | |||||
| #define ZGEMM_DEFAULT_P 288 | |||||
| #define CGEMM_DEFAULT_P 576 | |||||
| #else | |||||
| #define SGEMM_DEFAULT_P 448 | |||||
| #define DGEMM_DEFAULT_P 480 | |||||
| #define ZGEMM_DEFAULT_P 112 | |||||
| #define CGEMM_DEFAULT_P 224 | |||||
| #endif | |||||
| #define QGEMM_DEFAULT_P 112 | |||||
| #define XGEMM_DEFAULT_P 56 | |||||
| #if defined(ARCH_X86_64) | |||||
| #define SGEMM_DEFAULT_Q 192 | |||||
| #define DGEMM_DEFAULT_Q 160 | |||||
| #define ZGEMM_DEFAULT_Q 160 | |||||
| #define CGEMM_DEFAULT_Q 160 | |||||
| #else | |||||
| #define SGEMM_DEFAULT_Q 224 | |||||
| #define DGEMM_DEFAULT_Q 224 | |||||
| #define ZGEMM_DEFAULT_Q 224 | |||||
| #define CGEMM_DEFAULT_Q 224 | |||||
| #endif | |||||
| #define QGEMM_DEFAULT_Q 224 | |||||
| #define XGEMM_DEFAULT_Q 224 | |||||
| #define CGEMM3M_DEFAULT_P 448 | |||||
| #define ZGEMM3M_DEFAULT_P 224 | |||||
| #define XGEMM3M_DEFAULT_P 112 | |||||
| #define CGEMM3M_DEFAULT_Q 224 | |||||
| #define ZGEMM3M_DEFAULT_Q 224 | |||||
| #define XGEMM3M_DEFAULT_Q 224 | |||||
| #define CGEMM3M_DEFAULT_R 12288 | |||||
| #define ZGEMM3M_DEFAULT_R 12288 | |||||
| #define XGEMM3M_DEFAULT_R 12288 | |||||
| #define SGEMM_DEFAULT_R 12288 | |||||
| #define QGEMM_DEFAULT_R qgemm_r | |||||
| #define DGEMM_DEFAULT_R 12288 | |||||
| #define CGEMM_DEFAULT_R cgemm_r | |||||
| #define ZGEMM_DEFAULT_R zgemm_r | |||||
| #define XGEMM_DEFAULT_R xgemm_r | |||||
| #define SYMV_P 16 | |||||
| #define HAVE_EXCLUSIVE_CACHE | |||||
| #define GEMM_THREAD gemm_thread_mn | |||||
| #endif | |||||
| #ifdef ATHLON | #ifdef ATHLON | ||||
| #define SNUMOPT 4 | #define SNUMOPT 4 | ||||