This patch adds the basic infrastructure for adding the SkylakeX (Intel Skylake server) target. The SkylakeX target will use the AVX512 (AVX512VL level) instruction set, which brings 2 basic things: 1) 512 bit wide SIMD (2x width of AVX2) 2) 32 SIMD registers (2x the number on AVX2) This initial patch only contains a trivial transofrmation of the Haswell SGEMM kernel to AVX512VL; more will follow later but this patch aims to get the infrastructure in place for this "later". Full performance tuning has not been done yet; with more registers and wider SIMD it's in theory possible to retune the kernels but even without that there's an interesting enough performance increase (30-40% range) with just this change.tags/v0.3.1
| @@ -62,6 +62,9 @@ ifeq ($(BINARY), 32) | |||||
| ifeq ($(TARGET), HASWELL) | ifeq ($(TARGET), HASWELL) | ||||
| GETARCH_FLAGS := -DFORCE_NEHALEM | GETARCH_FLAGS := -DFORCE_NEHALEM | ||||
| endif | endif | ||||
| ifeq ($(TARGET), SKYLAKEX) | |||||
| GETARCH_FLAGS := -DFORCE_NEHALEM | |||||
| endif | |||||
| ifeq ($(TARGET), SANDYBRIDGE) | ifeq ($(TARGET), SANDYBRIDGE) | ||||
| GETARCH_FLAGS := -DFORCE_NEHALEM | GETARCH_FLAGS := -DFORCE_NEHALEM | ||||
| endif | endif | ||||
| @@ -95,6 +98,9 @@ ifeq ($(BINARY), 32) | |||||
| ifeq ($(TARGET_CORE), HASWELL) | ifeq ($(TARGET_CORE), HASWELL) | ||||
| GETARCH_FLAGS := -DFORCE_NEHALEM | GETARCH_FLAGS := -DFORCE_NEHALEM | ||||
| endif | endif | ||||
| ifeq ($(TARGET_CORE), SKYLAKEX) | |||||
| GETARCH_FLAGS := -DFORCE_NEHALEM | |||||
| endif | |||||
| ifeq ($(TARGET_CORE), SANDYBRIDGE) | ifeq ($(TARGET_CORE), SANDYBRIDGE) | ||||
| GETARCH_FLAGS := -DFORCE_NEHALEM | GETARCH_FLAGS := -DFORCE_NEHALEM | ||||
| endif | endif | ||||
| @@ -467,7 +473,7 @@ ifneq ($(NO_AVX), 1) | |||||
| DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR | DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR | ||||
| endif | endif | ||||
| ifneq ($(NO_AVX2), 1) | ifneq ($(NO_AVX2), 1) | ||||
| DYNAMIC_CORE += HASWELL ZEN | |||||
| DYNAMIC_CORE += HASWELL ZEN SKYLAKEX | |||||
| endif | endif | ||||
| endif | endif | ||||
| @@ -20,6 +20,7 @@ DUNNINGTON | |||||
| NEHALEM | NEHALEM | ||||
| SANDYBRIDGE | SANDYBRIDGE | ||||
| HASWELL | HASWELL | ||||
| SKYLAKEX | |||||
| ATOM | ATOM | ||||
| b)AMD CPU: | b)AMD CPU: | ||||
| @@ -56,6 +56,9 @@ if (DYNAMIC_ARCH) | |||||
| if (NOT NO_AVX2) | if (NOT NO_AVX2) | ||||
| set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN) | set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN) | ||||
| endif () | endif () | ||||
| if (NOT NO_AVX512) | |||||
| set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX) | |||||
| endif () | |||||
| endif () | endif () | ||||
| if (NOT DYNAMIC_CORE) | if (NOT DYNAMIC_CORE) | ||||
| @@ -33,7 +33,7 @@ endif () | |||||
| if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) | if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) | ||||
| message(STATUS "Compiling a ${BINARY}-bit binary.") | message(STATUS "Compiling a ${BINARY}-bit binary.") | ||||
| set(NO_AVX 1) | set(NO_AVX 1) | ||||
| if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE") | |||||
| if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX") | |||||
| set(TARGET "NEHALEM") | set(TARGET "NEHALEM") | ||||
| endif () | endif () | ||||
| if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN") | if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN") | ||||
| @@ -115,6 +115,7 @@ | |||||
| #define CORE_STEAMROLLER 25 | #define CORE_STEAMROLLER 25 | ||||
| #define CORE_EXCAVATOR 26 | #define CORE_EXCAVATOR 26 | ||||
| #define CORE_ZEN 27 | #define CORE_ZEN 27 | ||||
| #define CORE_SKYLAKEX 28 | |||||
| #define HAVE_SSE (1 << 0) | #define HAVE_SSE (1 << 0) | ||||
| #define HAVE_SSE2 (1 << 1) | #define HAVE_SSE2 (1 << 1) | ||||
| @@ -137,6 +138,7 @@ | |||||
| #define HAVE_AVX (1 << 18) | #define HAVE_AVX (1 << 18) | ||||
| #define HAVE_FMA4 (1 << 19) | #define HAVE_FMA4 (1 << 19) | ||||
| #define HAVE_FMA3 (1 << 20) | #define HAVE_FMA3 (1 << 20) | ||||
| #define HAVE_AVX512VL (1 << 21) | |||||
| #define CACHE_INFO_L1_I 1 | #define CACHE_INFO_L1_I 1 | ||||
| #define CACHE_INFO_L1_D 2 | #define CACHE_INFO_L1_D 2 | ||||
| @@ -211,5 +213,6 @@ typedef struct { | |||||
| #define CPUTYPE_STEAMROLLER 49 | #define CPUTYPE_STEAMROLLER 49 | ||||
| #define CPUTYPE_EXCAVATOR 50 | #define CPUTYPE_EXCAVATOR 50 | ||||
| #define CPUTYPE_ZEN 51 | #define CPUTYPE_ZEN 51 | ||||
| #define CPUTYPE_SKYLAKEX 52 | |||||
| #endif | #endif | ||||
| @@ -50,6 +50,8 @@ | |||||
| #ifdef NO_AVX | #ifdef NO_AVX | ||||
| #define CPUTYPE_HASWELL CPUTYPE_NEHALEM | #define CPUTYPE_HASWELL CPUTYPE_NEHALEM | ||||
| #define CORE_HASWELL CORE_NEHALEM | #define CORE_HASWELL CORE_NEHALEM | ||||
| #define CPUTYPE_SKYLAKEX CPUTYPE_NEHALEM | |||||
| #define CORE_SKYLAKEX CORE_NEHALEM | |||||
| #define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM | #define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM | ||||
| #define CORE_SANDYBRIDGE CORE_NEHALEM | #define CORE_SANDYBRIDGE CORE_NEHALEM | ||||
| #define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA | #define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA | ||||
| @@ -74,6 +74,7 @@ extern gotoblas_t gotoblas_STEAMROLLER; | |||||
| extern gotoblas_t gotoblas_EXCAVATOR; | extern gotoblas_t gotoblas_EXCAVATOR; | ||||
| #ifdef NO_AVX2 | #ifdef NO_AVX2 | ||||
| #define gotoblas_HASWELL gotoblas_SANDYBRIDGE | #define gotoblas_HASWELL gotoblas_SANDYBRIDGE | ||||
| #define gotoblas_SKYLAKEX gotoblas_SANDYBRIDGE | |||||
| #define gotoblas_ZEN gotoblas_SANDYBRIDGE | #define gotoblas_ZEN gotoblas_SANDYBRIDGE | ||||
| #else | #else | ||||
| extern gotoblas_t gotoblas_HASWELL; | extern gotoblas_t gotoblas_HASWELL; | ||||
| @@ -83,6 +84,7 @@ extern gotoblas_t gotoblas_ZEN; | |||||
| //Use NEHALEM kernels for sandy bridge | //Use NEHALEM kernels for sandy bridge | ||||
| #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM | #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM | ||||
| #define gotoblas_HASWELL gotoblas_NEHALEM | #define gotoblas_HASWELL gotoblas_NEHALEM | ||||
| #define gotoblas_SKYLAKEX gotoblas_NEHALEM | |||||
| #define gotoblas_BULLDOZER gotoblas_BARCELONA | #define gotoblas_BULLDOZER gotoblas_BARCELONA | ||||
| #define gotoblas_PILEDRIVER gotoblas_BARCELONA | #define gotoblas_PILEDRIVER gotoblas_BARCELONA | ||||
| #define gotoblas_STEAMROLLER gotoblas_BARCELONA | #define gotoblas_STEAMROLLER gotoblas_BARCELONA | ||||
| @@ -167,7 +167,7 @@ int get_L2_size(void){ | |||||
| #if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ | #if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ | ||||
| defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ | defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ | ||||
| defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \ | defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \ | ||||
| defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) | |||||
| defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || defined(SKYLAKEX) | |||||
| cpuid(0x80000006, &eax, &ebx, &ecx, &edx); | cpuid(0x80000006, &eax, &ebx, &ecx, &edx); | ||||
| @@ -251,7 +251,7 @@ int get_L2_size(void){ | |||||
| void blas_set_parameter(void){ | void blas_set_parameter(void){ | ||||
| int factor; | int factor; | ||||
| #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) | |||||
| #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || defined(SKYLAKEX) | |||||
| int size = 16; | int size = 16; | ||||
| #else | #else | ||||
| int size = get_L2_size(); | int size = get_L2_size(); | ||||
| @@ -326,6 +326,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define CORENAME "HASWELL" | #define CORENAME "HASWELL" | ||||
| #endif | #endif | ||||
| #ifdef FORCE_SKYLAKEX | |||||
| #define FORCE | |||||
| #define FORCE_INTEL | |||||
| #define ARCHITECTURE "X86" | |||||
| #define SUBARCHITECTURE "SKYLAKEX" | |||||
| #define ARCHCONFIG "-DSKYLAKEX " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
| "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \ | |||||
| "-DFMA3 -DHAVE_AVX512VL -march=skylake-avx512" | |||||
| #define LIBNAME "skylakex" | |||||
| #define CORENAME "SKYLAKEX" | |||||
| #endif | |||||
| #ifdef FORCE_ATOM | #ifdef FORCE_ATOM | ||||
| #define FORCE | #define FORCE | ||||
| #define FORCE_INTEL | #define FORCE_INTEL | ||||
| @@ -121,7 +121,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||||
| # Makefile.L3 | # Makefile.L3 | ||||
| set(USE_TRMM false) | set(USE_TRMM false) | ||||
| if (ARM OR ARM64 OR "${TARGET_CORE}" STREQUAL "LONGSOON3B" OR "${TARGET_CORE}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET_CORE}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "${CORE}" STREQUAL "zen") | |||||
| if (ARM OR ARM64 OR "${TARGET_CORE}" STREQUAL "LONGSOON3B" OR "${TARGET_CORE}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET_CORE}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "${CORE}" STREQUAL "zen" OR "${TARGET_CORE}" STREQUAL "SKYLAKEX" OR "${CORE}" STREQUAL "skylakex") | |||||
| set(USE_TRMM true) | set(USE_TRMM true) | ||||
| endif () | endif () | ||||
| @@ -32,6 +32,10 @@ ifeq ($(CORE), HASWELL) | |||||
| USE_TRMM = 1 | USE_TRMM = 1 | ||||
| endif | endif | ||||
| ifeq ($(CORE), SKYLAKEX) | |||||
| USE_TRMM = 1 | |||||
| endif | |||||
| ifeq ($(CORE), ZEN) | ifeq ($(CORE), ZEN) | ||||
| USE_TRMM = 1 | USE_TRMM = 1 | ||||
| endif | endif | ||||
| @@ -871,6 +871,22 @@ static void init_parameter(void) { | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #ifdef SKYLAKEX | |||||
| #ifdef DEBUG | |||||
| fprintf(stderr, "SkylakeX\n"); | |||||
| #endif | |||||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||||
| #ifdef EXPRECISION | |||||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||||
| TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||||
| #endif | |||||
| #endif | |||||
| #ifdef OPTERON | #ifdef OPTERON | ||||
| @@ -62,7 +62,7 @@ | |||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| @@ -62,7 +62,7 @@ | |||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| @@ -62,7 +62,7 @@ | |||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| @@ -62,7 +62,7 @@ | |||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL || defined (SKYLAKEX)) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| @@ -62,7 +62,7 @@ | |||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| @@ -62,7 +62,7 @@ | |||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
| #endif | #endif | ||||
| @@ -61,7 +61,7 @@ | |||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| @@ -63,7 +63,7 @@ | |||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| @@ -61,7 +61,7 @@ | |||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| @@ -63,7 +63,7 @@ | |||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| @@ -61,7 +61,7 @@ | |||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
| #define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
| #endif | #endif | ||||
| @@ -0,0 +1,4 @@ | |||||
| include $(KERNELDIR)/KERNEL.HASWELL | |||||
| SGEMMKERNEL = sgemm_kernel_16x4_skylakex.S | |||||
| @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "caxpy_microk_steamroller-2.c" | #include "caxpy_microk_steamroller-2.c" | ||||
| #elif defined(BULLDOZER) | #elif defined(BULLDOZER) | ||||
| #include "caxpy_microk_bulldozer-2.c" | #include "caxpy_microk_bulldozer-2.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined(SKYLAKEX) | |||||
| #include "caxpy_microk_haswell-2.c" | #include "caxpy_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "caxpy_microk_sandy-2.c" | #include "caxpy_microk_sandy-2.c" | ||||
| @@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "cdot_microk_bulldozer-2.c" | #include "cdot_microk_bulldozer-2.c" | ||||
| #elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) | #elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) | ||||
| #include "cdot_microk_steamroller-2.c" | #include "cdot_microk_steamroller-2.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "cdot_microk_haswell-2.c" | #include "cdot_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "cdot_microk_sandy-2.c" | #include "cdot_microk_sandy-2.c" | ||||
| @@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include <stdio.h> | #include <stdio.h> | ||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(HASWELL) || defined(ZEN) | |||||
| #if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "cgemv_n_microk_haswell-4.c" | #include "cgemv_n_microk_haswell-4.c" | ||||
| #elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
| #include "cgemv_n_microk_bulldozer-4.c" | #include "cgemv_n_microk_bulldozer-4.c" | ||||
| @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(HASWELL) || defined(ZEN) | |||||
| #if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "cgemv_t_microk_haswell-4.c" | #include "cgemv_t_microk_haswell-4.c" | ||||
| #elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
| #include "cgemv_t_microk_bulldozer-4.c" | #include "cgemv_t_microk_bulldozer-4.c" | ||||
| @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(HASWELL) || defined(ZEN) | |||||
| #if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "cscal_microk_haswell-2.c" | #include "cscal_microk_haswell-2.c" | ||||
| #elif defined(BULLDOZER) || defined(PILEDRIVER) | #elif defined(BULLDOZER) || defined(PILEDRIVER) | ||||
| #include "cscal_microk_bulldozer-2.c" | #include "cscal_microk_bulldozer-2.c" | ||||
| @@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "daxpy_microk_steamroller-2.c" | #include "daxpy_microk_steamroller-2.c" | ||||
| #elif defined(PILEDRIVER) | #elif defined(PILEDRIVER) | ||||
| #include "daxpy_microk_piledriver-2.c" | #include "daxpy_microk_piledriver-2.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "daxpy_microk_haswell-2.c" | #include "daxpy_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "daxpy_microk_sandy-2.c" | #include "daxpy_microk_sandy-2.c" | ||||
| @@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "ddot_microk_piledriver-2.c" | #include "ddot_microk_piledriver-2.c" | ||||
| #elif defined(NEHALEM) | #elif defined(NEHALEM) | ||||
| #include "ddot_microk_nehalem-2.c" | #include "ddot_microk_nehalem-2.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "ddot_microk_haswell-2.c" | #include "ddot_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "ddot_microk_sandy-2.c" | #include "ddot_microk_sandy-2.c" | ||||
| @@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(NEHALEM) | #if defined(NEHALEM) | ||||
| #include "dgemv_n_microk_nehalem-4.c" | #include "dgemv_n_microk_nehalem-4.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX) | |||||
| #include "dgemv_n_microk_haswell-4.c" | #include "dgemv_n_microk_haswell-4.c" | ||||
| #endif | #endif | ||||
| @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) | |||||
| #if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX) | |||||
| #include "dgemv_t_microk_haswell-4.c" | #include "dgemv_t_microk_haswell-4.c" | ||||
| #endif | #endif | ||||
| @@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "dscal_microk_bulldozer-2.c" | #include "dscal_microk_bulldozer-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "dscal_microk_sandy-2.c" | #include "dscal_microk_sandy-2.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "dscal_microk_haswell-2.c" | #include "dscal_microk_haswell-2.c" | ||||
| #endif | #endif | ||||
| @@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
| #include "dsymv_L_microk_bulldozer-2.c" | #include "dsymv_L_microk_bulldozer-2.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "dsymv_L_microk_haswell-2.c" | #include "dsymv_L_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "dsymv_L_microk_sandy-2.c" | #include "dsymv_L_microk_sandy-2.c" | ||||
| @@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
| #include "dsymv_U_microk_bulldozer-2.c" | #include "dsymv_U_microk_bulldozer-2.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "dsymv_U_microk_haswell-2.c" | #include "dsymv_U_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "dsymv_U_microk_sandy-2.c" | #include "dsymv_U_microk_sandy-2.c" | ||||
| @@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(NEHALEM) | #if defined(NEHALEM) | ||||
| #include "saxpy_microk_nehalem-2.c" | #include "saxpy_microk_nehalem-2.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "saxpy_microk_haswell-2.c" | #include "saxpy_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "saxpy_microk_sandy-2.c" | #include "saxpy_microk_sandy-2.c" | ||||
| @@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "sdot_microk_steamroller-2.c" | #include "sdot_microk_steamroller-2.c" | ||||
| #elif defined(NEHALEM) | #elif defined(NEHALEM) | ||||
| #include "sdot_microk_nehalem-2.c" | #include "sdot_microk_nehalem-2.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "sdot_microk_haswell-2.c" | #include "sdot_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "sdot_microk_sandy-2.c" | #include "sdot_microk_sandy-2.c" | ||||
| @@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "sgemv_n_microk_nehalem-4.c" | #include "sgemv_n_microk_nehalem-4.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "sgemv_n_microk_sandy-4.c" | #include "sgemv_n_microk_sandy-4.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "sgemv_n_microk_haswell-4.c" | #include "sgemv_n_microk_haswell-4.c" | ||||
| #endif | #endif | ||||
| @@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "sgemv_t_microk_bulldozer-4.c" | #include "sgemv_t_microk_bulldozer-4.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "sgemv_t_microk_sandy-4.c" | #include "sgemv_t_microk_sandy-4.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "sgemv_t_microk_haswell-4.c" | #include "sgemv_t_microk_haswell-4.c" | ||||
| #endif | #endif | ||||
| @@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "ssymv_L_microk_bulldozer-2.c" | #include "ssymv_L_microk_bulldozer-2.c" | ||||
| #elif defined(NEHALEM) | #elif defined(NEHALEM) | ||||
| #include "ssymv_L_microk_nehalem-2.c" | #include "ssymv_L_microk_nehalem-2.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "ssymv_L_microk_haswell-2.c" | #include "ssymv_L_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "ssymv_L_microk_sandy-2.c" | #include "ssymv_L_microk_sandy-2.c" | ||||
| @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "ssymv_U_microk_bulldozer-2.c" | #include "ssymv_U_microk_bulldozer-2.c" | ||||
| #elif defined(NEHALEM) | #elif defined(NEHALEM) | ||||
| #include "ssymv_U_microk_nehalem-2.c" | #include "ssymv_U_microk_nehalem-2.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "ssymv_U_microk_haswell-2.c" | #include "ssymv_U_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "ssymv_U_microk_sandy-2.c" | #include "ssymv_U_microk_sandy-2.c" | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "zaxpy_microk_bulldozer-2.c" | #include "zaxpy_microk_bulldozer-2.c" | ||||
| #elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
| #include "zaxpy_microk_steamroller-2.c" | #include "zaxpy_microk_steamroller-2.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "zaxpy_microk_haswell-2.c" | #include "zaxpy_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "zaxpy_microk_sandy-2.c" | #include "zaxpy_microk_sandy-2.c" | ||||
| @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "zdot_microk_bulldozer-2.c" | #include "zdot_microk_bulldozer-2.c" | ||||
| #elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) | #elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) | ||||
| #include "zdot_microk_steamroller-2.c" | #include "zdot_microk_steamroller-2.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "zdot_microk_haswell-2.c" | #include "zdot_microk_haswell-2.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "zdot_microk_sandy-2.c" | #include "zdot_microk_sandy-2.c" | ||||
| @@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(HASWELL) || defined(ZEN) | |||||
| #if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "zgemv_n_microk_haswell-4.c" | #include "zgemv_n_microk_haswell-4.c" | ||||
| #elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
| #include "zgemv_n_microk_sandy-4.c" | #include "zgemv_n_microk_sandy-4.c" | ||||
| @@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
| #include "zgemv_t_microk_bulldozer-4.c" | #include "zgemv_t_microk_bulldozer-4.c" | ||||
| #elif defined(HASWELL) || defined(ZEN) | |||||
| #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "zgemv_t_microk_haswell-4.c" | #include "zgemv_t_microk_haswell-4.c" | ||||
| #endif | #endif | ||||
| @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(HASWELL) || defined(ZEN) | |||||
| #if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #include "zscal_microk_haswell-2.c" | #include "zscal_microk_haswell-2.c" | ||||
| #elif defined(BULLDOZER) || defined(PILEDRIVER) | #elif defined(BULLDOZER) || defined(PILEDRIVER) | ||||
| #include "zscal_microk_bulldozer-2.c" | #include "zscal_microk_bulldozer-2.c" | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| @@ -57,7 +57,7 @@ | |||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| #endif | #endif | ||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) | |||||
| #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) | |||||
| #define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
| #define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
| #define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
| @@ -1613,6 +1613,125 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | |||||
| #ifdef SKYLAKEX | |||||
| #define SNUMOPT 16 | |||||
| #define DNUMOPT 8 | |||||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||||
| #define SYMV_P 8 | |||||
| #define SWITCH_RATIO 4 | |||||
| #ifdef ARCH_X86 | |||||
| #define SGEMM_DEFAULT_UNROLL_M 4 | |||||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define QGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define CGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define ZGEMM_DEFAULT_UNROLL_M 1 | |||||
| #define XGEMM_DEFAULT_UNROLL_M 1 | |||||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define DGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define QGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define CGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define ZGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define XGEMM_DEFAULT_UNROLL_N 1 | |||||
| #else | |||||
| #define SGEMM_DEFAULT_UNROLL_M 16 | |||||
| #define DGEMM_DEFAULT_UNROLL_M 4 | |||||
| #define QGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define CGEMM_DEFAULT_UNROLL_M 8 | |||||
| #define ZGEMM_DEFAULT_UNROLL_M 4 | |||||
| #define XGEMM_DEFAULT_UNROLL_M 1 | |||||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define DGEMM_DEFAULT_UNROLL_N 8 | |||||
| #define QGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define CGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define ZGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define XGEMM_DEFAULT_UNROLL_N 1 | |||||
| #define SGEMM_DEFAULT_UNROLL_MN 32 | |||||
| #define DGEMM_DEFAULT_UNROLL_MN 32 | |||||
| #endif | |||||
| #ifdef ARCH_X86 | |||||
| #define SGEMM_DEFAULT_P 512 | |||||
| #define SGEMM_DEFAULT_R sgemm_r | |||||
| #define DGEMM_DEFAULT_P 512 | |||||
| #define DGEMM_DEFAULT_R dgemm_r | |||||
| #define QGEMM_DEFAULT_P 504 | |||||
| #define QGEMM_DEFAULT_R qgemm_r | |||||
| #define CGEMM_DEFAULT_P 128 | |||||
| #define CGEMM_DEFAULT_R 1024 | |||||
| #define ZGEMM_DEFAULT_P 512 | |||||
| #define ZGEMM_DEFAULT_R zgemm_r | |||||
| #define XGEMM_DEFAULT_P 252 | |||||
| #define XGEMM_DEFAULT_R xgemm_r | |||||
| #define SGEMM_DEFAULT_Q 256 | |||||
| #define DGEMM_DEFAULT_Q 256 | |||||
| #define QGEMM_DEFAULT_Q 128 | |||||
| #define CGEMM_DEFAULT_Q 256 | |||||
| #define ZGEMM_DEFAULT_Q 192 | |||||
| #define XGEMM_DEFAULT_Q 128 | |||||
| #else | |||||
| #define SGEMM_DEFAULT_P 768 | |||||
| #define DGEMM_DEFAULT_P 512 | |||||
| #define CGEMM_DEFAULT_P 384 | |||||
| #define ZGEMM_DEFAULT_P 256 | |||||
| #ifdef WINDOWS_ABI | |||||
| #define SGEMM_DEFAULT_Q 320 | |||||
| #define DGEMM_DEFAULT_Q 128 | |||||
| #else | |||||
| #define SGEMM_DEFAULT_Q 384 | |||||
| #define DGEMM_DEFAULT_Q 256 | |||||
| #endif | |||||
| #define CGEMM_DEFAULT_Q 192 | |||||
| #define ZGEMM_DEFAULT_Q 128 | |||||
| #define SGEMM_DEFAULT_R sgemm_r | |||||
| #define DGEMM_DEFAULT_R 13824 | |||||
| #define CGEMM_DEFAULT_R cgemm_r | |||||
| #define ZGEMM_DEFAULT_R zgemm_r | |||||
| #define QGEMM_DEFAULT_Q 128 | |||||
| #define QGEMM_DEFAULT_P 504 | |||||
| #define QGEMM_DEFAULT_R qgemm_r | |||||
| #define XGEMM_DEFAULT_P 252 | |||||
| #define XGEMM_DEFAULT_R xgemm_r | |||||
| #define XGEMM_DEFAULT_Q 128 | |||||
| #define CGEMM3M_DEFAULT_UNROLL_N 8 | |||||
| #define CGEMM3M_DEFAULT_UNROLL_M 4 | |||||
| #define ZGEMM3M_DEFAULT_UNROLL_N 8 | |||||
| #define ZGEMM3M_DEFAULT_UNROLL_M 2 | |||||
| #define CGEMM3M_DEFAULT_P 448 | |||||
| #define ZGEMM3M_DEFAULT_P 224 | |||||
| #define XGEMM3M_DEFAULT_P 112 | |||||
| #define CGEMM3M_DEFAULT_Q 224 | |||||
| #define ZGEMM3M_DEFAULT_Q 224 | |||||
| #define XGEMM3M_DEFAULT_Q 224 | |||||
| #define CGEMM3M_DEFAULT_R 12288 | |||||
| #define ZGEMM3M_DEFAULT_R 12288 | |||||
| #define XGEMM3M_DEFAULT_R 12288 | |||||
| #endif | |||||
| #endif | #endif | ||||