| @@ -65,6 +65,9 @@ endif | |||
| ifeq ($(TARGET), STEAMROLLER) | |||
| GETARCH_FLAGS := -DFORCE_BARCELONA | |||
| endif | |||
| ifeq ($(TARGET), EXCAVATOR) | |||
| GETARCH_FLAGS := -DFORCE_BARCELONA | |||
| endif | |||
| endif | |||
| @@ -92,6 +95,9 @@ endif | |||
| ifeq ($(TARGET_CORE), STEAMROLLER) | |||
| GETARCH_FLAGS := -DFORCE_BARCELONA | |||
| endif | |||
| ifeq ($(TARGET_CORE), EXCAVATOR) | |||
| GETARCH_FLAGS := -DFORCE_BARCELONA | |||
| endif | |||
| endif | |||
| @@ -409,7 +415,7 @@ endif | |||
| ifeq ($(ARCH), x86_64) | |||
| DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||
| ifneq ($(NO_AVX), 1) | |||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER | |||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR | |||
| endif | |||
| ifneq ($(NO_AVX2), 1) | |||
| DYNAMIC_CORE += HASWELL | |||
| @@ -33,6 +33,7 @@ BOBCAT | |||
| BULLDOZER | |||
| PILEDRIVER | |||
| STEAMROLLER | |||
| EXCAVATOR | |||
| c)VIA CPU: | |||
| SSE_GENERIC | |||
| @@ -171,7 +171,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ | |||
| #define MMXSTORE movd | |||
| #endif | |||
| #if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) | |||
| #if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR) | |||
| //Enable some optimazation for barcelona. | |||
| #define BARCELONA_OPTIMIZATION | |||
| #endif | |||
| @@ -226,7 +226,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ | |||
| #ifdef ASSEMBLER | |||
| #if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) | |||
| #if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR) | |||
| //Enable some optimazation for barcelona. | |||
| #define BARCELONA_OPTIMIZATION | |||
| #endif | |||
| @@ -109,6 +109,7 @@ | |||
| #define CORE_PILEDRIVER 23 | |||
| #define CORE_HASWELL 24 | |||
| #define CORE_STEAMROLLER 25 | |||
| #define CORE_EXCAVATOR 26 | |||
| #define HAVE_SSE (1 << 0) | |||
| #define HAVE_SSE2 (1 << 1) | |||
| @@ -203,5 +204,6 @@ typedef struct { | |||
| #define CPUTYPE_PILEDRIVER 47 | |||
| #define CPUTYPE_HASWELL 48 | |||
| #define CPUTYPE_STEAMROLLER 49 | |||
| #define CPUTYPE_EXCAVATOR 50 | |||
| #endif | |||
| @@ -1198,11 +1198,20 @@ int get_cpuname(void){ | |||
| else | |||
| return CPUTYPE_BARCELONA; //OS don't support AVX. | |||
| case 0: | |||
| if(support_avx()) | |||
| return CPUTYPE_STEAMROLLER; | |||
| else | |||
| return CPUTYPE_BARCELONA; //OS don't support AVX. | |||
| switch(exmodel){ | |||
| case 3: | |||
| if(support_avx()) | |||
| return CPUTYPE_STEAMROLLER; | |||
| else | |||
| return CPUTYPE_BARCELONA; //OS don't support AVX. | |||
| case 6: | |||
| if(support_avx()) | |||
| return CPUTYPE_EXCAVATOR; | |||
| else | |||
| return CPUTYPE_BARCELONA; //OS don't support AVX. | |||
| } | |||
| break; | |||
| } | |||
| break; | |||
| case 5: | |||
| @@ -1332,6 +1341,7 @@ static char *cpuname[] = { | |||
| "PILEDRIVER", | |||
| "HASWELL", | |||
| "STEAMROLLER", | |||
| "EXCAVATOR", | |||
| }; | |||
| static char *lowercpuname[] = { | |||
| @@ -1384,6 +1394,7 @@ static char *lowercpuname[] = { | |||
| "piledriver", | |||
| "haswell", | |||
| "steamroller", | |||
| "excavator", | |||
| }; | |||
| static char *corename[] = { | |||
| @@ -1413,6 +1424,7 @@ static char *corename[] = { | |||
| "PILEDRIVER", | |||
| "HASWELL", | |||
| "STEAMROLLER", | |||
| "EXCAVATOR", | |||
| }; | |||
| static char *corename_lower[] = { | |||
| @@ -1442,6 +1454,7 @@ static char *corename_lower[] = { | |||
| "piledriver", | |||
| "haswell", | |||
| "steamroller", | |||
| "excavator", | |||
| }; | |||
| @@ -1644,10 +1657,20 @@ int get_coretype(void){ | |||
| return CORE_BARCELONA; //OS don't support AVX. | |||
| case 0: | |||
| if(support_avx()) | |||
| return CORE_STEAMROLLER; | |||
| else | |||
| return CORE_BARCELONA; //OS don't support AVX. | |||
| switch(exmodel){ | |||
| case 3: | |||
| if(support_avx()) | |||
| return CORE_STEAMROLLER; | |||
| else | |||
| return CORE_BARCELONA; //OS don't support AVX. | |||
| case 6: | |||
| if(support_avx()) | |||
| return CORE_EXCAVATOR; | |||
| else | |||
| return CORE_BARCELONA; //OS don't support AVX. | |||
| } | |||
| break; | |||
| } | |||
| @@ -67,6 +67,7 @@ extern gotoblas_t gotoblas_SANDYBRIDGE; | |||
| extern gotoblas_t gotoblas_BULLDOZER; | |||
| extern gotoblas_t gotoblas_PILEDRIVER; | |||
| extern gotoblas_t gotoblas_STEAMROLLER; | |||
| extern gotoblas_t gotoblas_EXCAVATOR; | |||
| #ifdef NO_AVX2 | |||
| #define gotoblas_HASWELL gotoblas_SANDYBRIDGE | |||
| #else | |||
| @@ -79,6 +80,7 @@ extern gotoblas_t gotoblas_HASWELL; | |||
| #define gotoblas_BULLDOZER gotoblas_BARCELONA | |||
| #define gotoblas_PILEDRIVER gotoblas_BARCELONA | |||
| #define gotoblas_STEAMROLLER gotoblas_BARCELONA | |||
| #define gotoblas_EXCAVATOR gotoblas_BARCELONA | |||
| #endif | |||
| @@ -307,12 +309,22 @@ static gotoblas_t *get_coretype(void){ | |||
| return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | |||
| } | |||
| }else if(model == 0){ | |||
| //AMD STEAMROLLER | |||
| if(support_avx()) | |||
| return &gotoblas_STEAMROLLER; | |||
| else{ | |||
| openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); | |||
| return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | |||
| if (exmodel == 3) { | |||
| //AMD STEAMROLLER | |||
| if(support_avx()) | |||
| return &gotoblas_STEAMROLLER; | |||
| else{ | |||
| openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); | |||
| return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | |||
| } | |||
| }else if (exmodel == 6) { | |||
| if(support_avx()) | |||
| return &gotoblas_EXCAVATOR; | |||
| else{ | |||
| openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); | |||
| return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | |||
| } | |||
| } | |||
| } | |||
| @@ -357,6 +369,7 @@ static char *corename[] = { | |||
| "Piledriver", | |||
| "Haswell", | |||
| "Steamroller", | |||
| "Excavator", | |||
| }; | |||
| char *gotoblas_corename(void) { | |||
| @@ -382,6 +395,7 @@ char *gotoblas_corename(void) { | |||
| if (gotoblas == &gotoblas_PILEDRIVER) return corename[19]; | |||
| if (gotoblas == &gotoblas_HASWELL) return corename[20]; | |||
| if (gotoblas == &gotoblas_STEAMROLLER) return corename[21]; | |||
| if (gotoblas == &gotoblas_EXCAVATOR) return corename[22]; | |||
| return corename[0]; | |||
| } | |||
| @@ -412,7 +426,7 @@ static gotoblas_t *force_coretype(char *coretype){ | |||
| switch (found) | |||
| { | |||
| case 22: return (&gotoblas_EXCAVATOR); | |||
| case 21: return (&gotoblas_STEAMROLLER); | |||
| case 20: return (&gotoblas_HASWELL); | |||
| case 19: return (&gotoblas_PILEDRIVER); | |||
| @@ -448,6 +448,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CORENAME "STEAMROLLER" | |||
| #endif | |||
| #if defined (FORCE_EXCAVATOR) | |||
| #define FORCE | |||
| #define FORCE_INTEL | |||
| #define ARCHITECTURE "X86" | |||
| #define SUBARCHITECTURE "EXCAVATOR" | |||
| #define ARCHCONFIG "-DEXCAVATOR " \ | |||
| "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \ | |||
| "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL3_SIZE=12582912 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||
| "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \ | |||
| "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \ | |||
| "-DHAVE_AVX -DHAVE_FMA4 -DHAVE_FMA3" | |||
| #define LIBNAME "excavator" | |||
| #define CORENAME "EXCAVATOR" | |||
| #endif | |||
| #ifdef FORCE_SSE_GENERIC | |||
| #define FORCE | |||
| @@ -0,0 +1,92 @@ | |||
| SAXPYKERNEL = saxpy.c | |||
| DAXPYKERNEL = daxpy.c | |||
| CAXPYKERNEL = caxpy.c | |||
| ZAXPYKERNEL = zaxpy.c | |||
| SDOTKERNEL = sdot.c | |||
| DDOTKERNEL = ddot.c | |||
| CDOTKERNEL = cdot.c | |||
| ZDOTKERNEL = zdot.c | |||
| DSYMV_U_KERNEL = dsymv_U.c | |||
| DSYMV_L_KERNEL = dsymv_L.c | |||
| SSYMV_U_KERNEL = ssymv_U.c | |||
| SSYMV_L_KERNEL = ssymv_L.c | |||
| SGEMVNKERNEL = sgemv_n_4.c | |||
| SGEMVTKERNEL = sgemv_t_4.c | |||
| DGEMVNKERNEL = dgemv_n_4.c | |||
| DGEMVTKERNEL = dgemv_t_4.c | |||
| ZGEMVNKERNEL = zgemv_n_dup.S | |||
| ZGEMVTKERNEL = zgemv_t_4.c | |||
| DCOPYKERNEL = dcopy_bulldozer.S | |||
| SGEMMKERNEL = sgemm_kernel_16x2_piledriver.S | |||
| SGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||
| SGEMMITCOPY = ../generic/gemm_tcopy_16.c | |||
| SGEMMONCOPY = gemm_ncopy_2_bulldozer.S | |||
| SGEMMOTCOPY = gemm_tcopy_2_bulldozer.S | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMKERNEL = dgemm_kernel_8x2_piledriver.S | |||
| DGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||
| DGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||
| DGEMMONCOPY = gemm_ncopy_2_bulldozer.S | |||
| DGEMMOTCOPY = gemm_tcopy_2_bulldozer.S | |||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMKERNEL = cgemm_kernel_4x2_piledriver.S | |||
| CGEMMINCOPY = ../generic/zgemm_ncopy_4.c | |||
| CGEMMITCOPY = ../generic/zgemm_tcopy_4.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMKERNEL = zgemm_kernel_2x2_piledriver.S | |||
| ZGEMMINCOPY = | |||
| ZGEMMITCOPY = | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| ZGEMMINCOPYOBJ = | |||
| ZGEMMITCOPYOBJ = | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMM3MKERNEL = zgemm3m_kernel_8x4_barcelona.S | |||
| ZGEMM3MKERNEL = zgemm3m_kernel_4x4_barcelona.S | |||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| DTRSMKERNEL_LT = dtrsm_kernel_LT_8x2_bulldozer.S | |||
| DTRSMKERNEL_RN = dtrsm_kernel_RN_8x2_bulldozer.S | |||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| @@ -499,6 +499,98 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #endif | |||
| #ifdef EXCAVATOR | |||
| #define SNUMOPT 8 | |||
| #define DNUMOPT 4 | |||
| #define GEMM_DEFAULT_OFFSET_A 64 | |||
| #define GEMM_DEFAULT_OFFSET_B 832 | |||
| #define GEMM_DEFAULT_ALIGN 0x0fffUL | |||
| #define QGEMM_DEFAULT_UNROLL_N 2 | |||
| #define CGEMM_DEFAULT_UNROLL_N 2 | |||
| #define ZGEMM_DEFAULT_UNROLL_N 2 | |||
| #define XGEMM_DEFAULT_UNROLL_N 1 | |||
| #ifdef ARCH_X86 | |||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||
| #define DGEMM_DEFAULT_UNROLL_N 4 | |||
| #define SGEMM_DEFAULT_UNROLL_M 4 | |||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||
| #define QGEMM_DEFAULT_UNROLL_M 2 | |||
| #define CGEMM_DEFAULT_UNROLL_M 2 | |||
| #define ZGEMM_DEFAULT_UNROLL_M 1 | |||
| #define XGEMM_DEFAULT_UNROLL_M 1 | |||
| #else | |||
| #define SGEMM_DEFAULT_UNROLL_N 2 | |||
| #define DGEMM_DEFAULT_UNROLL_N 2 | |||
| #define SGEMM_DEFAULT_UNROLL_M 16 | |||
| #define DGEMM_DEFAULT_UNROLL_M 8 | |||
| #define QGEMM_DEFAULT_UNROLL_M 2 | |||
| #define CGEMM_DEFAULT_UNROLL_M 4 | |||
| #define ZGEMM_DEFAULT_UNROLL_M 2 | |||
| #define XGEMM_DEFAULT_UNROLL_M 1 | |||
| #define CGEMM3M_DEFAULT_UNROLL_N 4 | |||
| #define CGEMM3M_DEFAULT_UNROLL_M 8 | |||
| #define ZGEMM3M_DEFAULT_UNROLL_N 4 | |||
| #define ZGEMM3M_DEFAULT_UNROLL_M 4 | |||
| #define GEMV_UNROLL 8 | |||
| #endif | |||
| #if defined(ARCH_X86_64) | |||
| #define SGEMM_DEFAULT_P 768 | |||
| #define DGEMM_DEFAULT_P 576 | |||
| #define ZGEMM_DEFAULT_P 288 | |||
| #define CGEMM_DEFAULT_P 576 | |||
| #else | |||
| #define SGEMM_DEFAULT_P 448 | |||
| #define DGEMM_DEFAULT_P 480 | |||
| #define ZGEMM_DEFAULT_P 112 | |||
| #define CGEMM_DEFAULT_P 224 | |||
| #endif | |||
| #define QGEMM_DEFAULT_P 112 | |||
| #define XGEMM_DEFAULT_P 56 | |||
| #if defined(ARCH_X86_64) | |||
| #define SGEMM_DEFAULT_Q 192 | |||
| #define DGEMM_DEFAULT_Q 160 | |||
| #define ZGEMM_DEFAULT_Q 160 | |||
| #define CGEMM_DEFAULT_Q 160 | |||
| #else | |||
| #define SGEMM_DEFAULT_Q 224 | |||
| #define DGEMM_DEFAULT_Q 224 | |||
| #define ZGEMM_DEFAULT_Q 224 | |||
| #define CGEMM_DEFAULT_Q 224 | |||
| #endif | |||
| #define QGEMM_DEFAULT_Q 224 | |||
| #define XGEMM_DEFAULT_Q 224 | |||
| #define CGEMM3M_DEFAULT_P 448 | |||
| #define ZGEMM3M_DEFAULT_P 224 | |||
| #define XGEMM3M_DEFAULT_P 112 | |||
| #define CGEMM3M_DEFAULT_Q 224 | |||
| #define ZGEMM3M_DEFAULT_Q 224 | |||
| #define XGEMM3M_DEFAULT_Q 224 | |||
| #define CGEMM3M_DEFAULT_R 12288 | |||
| #define ZGEMM3M_DEFAULT_R 12288 | |||
| #define XGEMM3M_DEFAULT_R 12288 | |||
| #define SGEMM_DEFAULT_R 12288 | |||
| #define QGEMM_DEFAULT_R qgemm_r | |||
| #define DGEMM_DEFAULT_R 12288 | |||
| #define CGEMM_DEFAULT_R cgemm_r | |||
| #define ZGEMM_DEFAULT_R zgemm_r | |||
| #define XGEMM_DEFAULT_R xgemm_r | |||
| #define SYMV_P 16 | |||
| #define HAVE_EXCLUSIVE_CACHE | |||
| #define GEMM_THREAD gemm_thread_mn | |||
| #endif | |||
| #ifdef ATHLON | |||
| #define SNUMOPT 4 | |||