Browse Source

Merge pull request #1133 from steckdenis/develop

Add ZEN support
tags/v0.2.20^2
Martin Kroeker GitHub 8 years ago
parent
commit
66dc10b019
46 changed files with 339 additions and 58 deletions
  1. +7
    -1
      Makefile.system
  2. +1
    -0
      TargetList.txt
  3. +1
    -1
      cmake/arch.cmake
  4. +1
    -1
      cmake/system.cmake
  5. +2
    -0
      cpuid.h
  6. +41
    -10
      cpuid_x86.c
  7. +24
    -11
      driver/others/dynamic.c
  8. +2
    -2
      driver/others/parameter.c
  9. +19
    -0
      getarch.c
  10. +1
    -1
      kernel/CMakeLists.txt
  11. +4
    -0
      kernel/Makefile.L3
  12. +16
    -0
      kernel/setparam-ref.c
  13. +1
    -0
      kernel/x86/KERNEL.ZEN
  14. +98
    -0
      kernel/x86_64/KERNEL.ZEN
  15. +1
    -1
      kernel/x86_64/caxpy.c
  16. +1
    -1
      kernel/x86_64/cdot.c
  17. +1
    -1
      kernel/x86_64/cgemv_n_4.c
  18. +1
    -1
      kernel/x86_64/cgemv_t_4.c
  19. +1
    -1
      kernel/x86_64/cscal.c
  20. +1
    -1
      kernel/x86_64/daxpy.c
  21. +1
    -1
      kernel/x86_64/ddot.c
  22. +1
    -1
      kernel/x86_64/dgemv_n_4.c
  23. +1
    -1
      kernel/x86_64/dgemv_t_4.c
  24. +1
    -1
      kernel/x86_64/dscal.c
  25. +1
    -1
      kernel/x86_64/dsymv_L.c
  26. +1
    -1
      kernel/x86_64/dsymv_U.c
  27. +1
    -1
      kernel/x86_64/saxpy.c
  28. +1
    -1
      kernel/x86_64/sdot.c
  29. +1
    -1
      kernel/x86_64/sgemv_n_4.c
  30. +1
    -1
      kernel/x86_64/sgemv_t_4.c
  31. +1
    -1
      kernel/x86_64/ssymv_L.c
  32. +1
    -1
      kernel/x86_64/ssymv_U.c
  33. +1
    -1
      kernel/x86_64/symv_L_sse.S
  34. +1
    -1
      kernel/x86_64/symv_L_sse2.S
  35. +1
    -1
      kernel/x86_64/symv_U_sse.S
  36. +1
    -1
      kernel/x86_64/symv_U_sse2.S
  37. +1
    -1
      kernel/x86_64/zaxpy.c
  38. +1
    -1
      kernel/x86_64/zdot.c
  39. +1
    -1
      kernel/x86_64/zgemv_n_4.c
  40. +1
    -1
      kernel/x86_64/zgemv_t_4.c
  41. +1
    -1
      kernel/x86_64/zscal.c
  42. +1
    -1
      kernel/x86_64/zsymv_L_sse.S
  43. +1
    -1
      kernel/x86_64/zsymv_L_sse2.S
  44. +1
    -1
      kernel/x86_64/zsymv_U_sse.S
  45. +1
    -1
      kernel/x86_64/zsymv_U_sse2.S
  46. +90
    -0
      param.h

+ 7
- 1
Makefile.system View File

@@ -68,6 +68,9 @@ endif
ifeq ($(TARGET), EXCAVATOR)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET), ZEN)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
endif


@@ -98,6 +101,9 @@ endif
ifeq ($(TARGET_CORE), EXCAVATOR)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET_CORE), ZEN)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
endif


@@ -443,7 +449,7 @@ ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
endif
ifneq ($(NO_AVX2), 1)
DYNAMIC_CORE += HASWELL
DYNAMIC_CORE += HASWELL ZEN
endif
endif



+ 1
- 0
TargetList.txt View File

@@ -34,6 +34,7 @@ BULLDOZER
PILEDRIVER
STEAMROLLER
EXCAVATOR
ZEN

c)VIA CPU:
SSE_GENERIC


+ 1
- 1
cmake/arch.cmake View File

@@ -73,7 +73,7 @@ if (DYNAMIC_ARCH)
set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER")
endif ()
if (NOT NO_AVX2)
set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL")
set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL ZEN")
endif ()
endif ()



+ 1
- 1
cmake/system.cmake View File

@@ -22,7 +22,7 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE")
set(TARGET "NEHALEM")
endif ()
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER")
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")
set(TARGET "BARCELONA")
endif ()
endif ()


+ 2
- 0
cpuid.h View File

@@ -114,6 +114,7 @@
#define CORE_HASWELL 24
#define CORE_STEAMROLLER 25
#define CORE_EXCAVATOR 26
#define CORE_ZEN 27

#define HAVE_SSE (1 << 0)
#define HAVE_SSE2 (1 << 1)
@@ -209,5 +210,6 @@ typedef struct {
#define CPUTYPE_HASWELL 48
#define CPUTYPE_STEAMROLLER 49
#define CPUTYPE_EXCAVATOR 50
#define CPUTYPE_ZEN 51

#endif

+ 41
- 10
cpuid_x86.c View File

@@ -1281,6 +1281,8 @@ int get_cpuname(void){
case 3:
case 10:
return CPUTYPE_BARCELONA;
case 5:
return CPUTYPE_BOBCAT;
case 6:
switch (model) {
case 1:
@@ -1295,8 +1297,8 @@ int get_cpuname(void){
return CPUTYPE_PILEDRIVER;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.
case 5: // New EXCAVATOR CPUS
if(support_avx())
case 5: // New EXCAVATOR CPUS
if(support_avx())
return CPUTYPE_EXCAVATOR;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.
@@ -1322,8 +1324,19 @@ int get_cpuname(void){
break;
}
break;
case 5:
return CPUTYPE_BOBCAT;
case 8:
switch (model) {
case 1:
// AMD Ryzen
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_ZEN;
#else
return CPUTYPE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator
#endif
else
return CPUTYPE_BARCELONA;
}
}
break;
}
@@ -1450,6 +1463,7 @@ static char *cpuname[] = {
"HASWELL",
"STEAMROLLER",
"EXCAVATOR",
"ZEN",
};

static char *lowercpuname[] = {
@@ -1503,6 +1517,7 @@ static char *lowercpuname[] = {
"haswell",
"steamroller",
"excavator",
"zen",
};

static char *corename[] = {
@@ -1533,6 +1548,7 @@ static char *corename[] = {
"HASWELL",
"STEAMROLLER",
"EXCAVATOR",
"ZEN",
};

static char *corename_lower[] = {
@@ -1563,6 +1579,7 @@ static char *corename_lower[] = {
"haswell",
"steamroller",
"excavator",
"zen",
};


@@ -1776,15 +1793,16 @@ int get_coretype(void){
break;
case 9:
case 8:
if (model == 14) // Kaby Lake
if (model == 14) { // Kaby Lake
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
}
}
break;

@@ -1841,9 +1859,22 @@ int get_coretype(void){
}
break;
}


}else return CORE_BARCELONA;
} else if (exfamily == 8) {
switch (model) {
case 1:
// AMD Ryzen
if(support_avx())
#ifndef NO_AVX2
return CORE_ZEN;
#else
return CORE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator
#endif
else
return CORE_BARCELONA;
}
} else {
return CORE_BARCELONA;
}
}
}



+ 24
- 11
driver/others/dynamic.c View File

@@ -70,8 +70,10 @@ extern gotoblas_t gotoblas_STEAMROLLER;
extern gotoblas_t gotoblas_EXCAVATOR;
#ifdef NO_AVX2
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
#define gotoblas_ZEN gotoblas_SANDYBRIDGE
#else
extern gotoblas_t gotoblas_HASWELL;
extern gotoblas_t gotoblas_ZEN;
#endif
#else
//Use NEHALEM kernels for sandy bridge
@@ -81,6 +83,7 @@ extern gotoblas_t gotoblas_HASWELL;
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
#define gotoblas_STEAMROLLER gotoblas_BARCELONA
#define gotoblas_EXCAVATOR gotoblas_BARCELONA
#define gotoblas_ZEN gotoblas_BARCELONA
#endif


@@ -355,14 +358,14 @@ static gotoblas_t *get_coretype(void){
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
}
}else if(model == 5){
if(support_avx())
return &gotoblas_EXCAVATOR;
else{
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
}
}else if(model == 0){
}else if(model == 5){
if(support_avx())
return &gotoblas_EXCAVATOR;
else{
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
}
}else if(model == 0){
if (exmodel == 1) {
//AMD Trinity
if(support_avx())
@@ -389,9 +392,16 @@ static gotoblas_t *get_coretype(void){

}
}


} else {
} else if (family == 8) {
if (model == 1) {
if(support_avx())
return &gotoblas_ZEN;
else{
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
}
}
}else {
return &gotoblas_BARCELONA;
}
}
@@ -431,6 +441,7 @@ static char *corename[] = {
"Haswell",
"Steamroller",
"Excavator",
"Zen"
};

char *gotoblas_corename(void) {
@@ -457,6 +468,7 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_HASWELL) return corename[20];
if (gotoblas == &gotoblas_STEAMROLLER) return corename[21];
if (gotoblas == &gotoblas_EXCAVATOR) return corename[22];
if (gotoblas == &gotoblas_ZEN) return corename[23];

return corename[0];
}
@@ -487,6 +499,7 @@ static gotoblas_t *force_coretype(char *coretype){

switch (found)
{
case 23: return (&gotoblas_ZEN);
case 22: return (&gotoblas_EXCAVATOR);
case 21: return (&gotoblas_STEAMROLLER);
case 20: return (&gotoblas_HASWELL);


+ 2
- 2
driver/others/parameter.c View File

@@ -167,7 +167,7 @@ int get_L2_size(void){
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR)
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN)

cpuid(0x80000006, &eax, &ebx, &ecx, &edx);

@@ -251,7 +251,7 @@ int get_L2_size(void){
void blas_set_parameter(void){

int factor;
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR)
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN)
int size = 16;
#else
int size = get_L2_size();


+ 19
- 0
getarch.c View File

@@ -473,6 +473,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "EXCAVATOR"
#endif

#if defined (FORCE_ZEN)
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
#define SUBARCHITECTURE "ZEN"
#define ARCHCONFIG "-DZEN " \
"-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL2_CODE_ASSOCIATIVE=8 " \
"-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
"-DL3_SIZE=16777216 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=8 " \
"-DITB_DEFAULT_ENTRIES=64 -DITB_SIZE=4096 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
"-DHAVE_AVX -DHAVE_FMA3 -DFMA3"
#define LIBNAME "excavator"
#define CORENAME "EXCAVATOR"
#endif


#ifdef FORCE_SSE_GENERIC
#define FORCE


+ 1
- 1
kernel/CMakeLists.txt View File

@@ -118,7 +118,7 @@ endforeach ()
# Makefile.L3
set(USE_TRMM false)

if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell")
if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "{CORE}" STREQUAL "zen")
set(USE_TRMM true)
endif ()



+ 4
- 0
kernel/Makefile.L3 View File

@@ -32,6 +32,10 @@ ifeq ($(CORE), HASWELL)
USE_TRMM = 1
endif

ifeq ($(CORE), ZEN)
USE_TRMM = 1
endif

ifeq ($(CORE), POWER8)
USE_TRMM = 1
endif


+ 16
- 0
kernel/setparam-ref.c View File

@@ -982,6 +982,22 @@ static void init_parameter(void) {
#endif
#endif

#ifdef ZEN

#ifdef DEBUG
fprintf(stderr, "Zen\n");
#endif

TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif


#ifdef NANO



+ 1
- 0
kernel/x86/KERNEL.ZEN View File

@@ -0,0 +1 @@
include $(KERNELDIR)/KERNEL.BARCELONA

+ 98
- 0
kernel/x86_64/KERNEL.ZEN View File

@@ -0,0 +1,98 @@
DSCALKERNEL = dscal.c
CSCALKERNEL = cscal.c
ZSCALKERNEL = zscal.c

SGEMVNKERNEL = sgemv_n_4.c
SGEMVTKERNEL = sgemv_t_4.c

DGEMVNKERNEL = dgemv_n_4.c
DGEMVTKERNEL = dgemv_t_4.c

ZGEMVNKERNEL = zgemv_n_4.c
ZGEMVTKERNEL = zgemv_t_4.c

CGEMVNKERNEL = cgemv_n_4.c
CGEMVTKERNEL = cgemv_t_4.c

SSYMV_L_KERNEL = ssymv_L.c
SSYMV_U_KERNEL = ssymv_U.c
DSYMV_L_KERNEL = dsymv_L.c
DSYMV_U_KERNEL = dsymv_U.c

SDOTKERNEL = sdot.c
DDOTKERNEL = ddot.c
CDOTKERNEL = cdot.c
ZDOTKERNEL = zdot.c

SAXPYKERNEL = saxpy.c
DAXPYKERNEL = daxpy.c
CAXPYKERNEL = caxpy.c
ZAXPYKERNEL = zaxpy.c

STRMMKERNEL = sgemm_kernel_16x4_haswell.S
SGEMMKERNEL = sgemm_kernel_16x4_haswell.S
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)

DTRMMKERNEL = dtrmm_kernel_4x8_haswell.c
DGEMMKERNEL = dgemm_kernel_4x8_haswell.S
DGEMMINCOPY = ../generic/gemm_ncopy_4.c
DGEMMITCOPY = ../generic/gemm_tcopy_4.c
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)

CTRMMKERNEL = cgemm_kernel_8x2_haswell.S
CGEMMKERNEL = cgemm_kernel_8x2_haswell.S
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)

ZTRMMKERNEL = zgemm_kernel_4x2_haswell.S
ZGEMMKERNEL = zgemm_kernel_4x2_haswell.S
ZGEMMINCOPY = ../generic/zgemm_ncopy_4.c
ZGEMMITCOPY = ../generic/zgemm_tcopy_4.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)

STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = dtrsm_kernel_RN_haswell.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

CGEMM3MKERNEL = zgemm3m_kernel_4x8_nehalem.S
ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S


+ 1
- 1
kernel/x86_64/caxpy.c View File

@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "caxpy_microk_steamroller-2.c"
#elif defined(BULLDOZER)
#include "caxpy_microk_bulldozer-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "caxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "caxpy_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/cdot.c View File

@@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cdot_microk_bulldozer-2.c"
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR)
#include "cdot_microk_steamroller-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "cdot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "cdot_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/cgemv_n_4.c View File

@@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdio.h>
#include "common.h"

#if defined(HASWELL)
#if defined(HASWELL) || defined(ZEN)
#include "cgemv_n_microk_haswell-4.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "cgemv_n_microk_bulldozer-4.c"


+ 1
- 1
kernel/x86_64/cgemv_t_4.c View File

@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "common.h"

#if defined(HASWELL)
#if defined(HASWELL) || defined(ZEN)
#include "cgemv_t_microk_haswell-4.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "cgemv_t_microk_bulldozer-4.c"


+ 1
- 1
kernel/x86_64/cscal.c View File

@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"


#if defined(HASWELL)
#if defined(HASWELL) || defined(ZEN)
#include "cscal_microk_haswell-2.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER)
#include "cscal_microk_bulldozer-2.c"


+ 1
- 1
kernel/x86_64/daxpy.c View File

@@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "daxpy_microk_steamroller-2.c"
#elif defined(PILEDRIVER)
#include "daxpy_microk_piledriver-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "daxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "daxpy_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/ddot.c View File

@@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ddot_microk_piledriver-2.c"
#elif defined(NEHALEM)
#include "ddot_microk_nehalem-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "ddot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "ddot_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/dgemv_n_4.c View File

@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if defined(NEHALEM)
#include "dgemv_n_microk_nehalem-4.c"
#elif defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR)
#elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "dgemv_n_microk_haswell-4.c"
#endif



+ 1
- 1
kernel/x86_64/dgemv_t_4.c View File

@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "common.h"

#if defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR)
#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "dgemv_t_microk_haswell-4.c"
#endif



+ 1
- 1
kernel/x86_64/dscal.c View File

@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "dscal_microk_bulldozer-2.c"
#elif defined(SANDYBRIDGE)
#include "dscal_microk_sandy-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "dscal_microk_haswell-2.c"
#endif



+ 1
- 1
kernel/x86_64/dsymv_L.c View File

@@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "dsymv_L_microk_bulldozer-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "dsymv_L_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "dsymv_L_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/dsymv_U.c View File

@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "dsymv_U_microk_bulldozer-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "dsymv_U_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "dsymv_U_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/saxpy.c View File

@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if defined(NEHALEM)
#include "saxpy_microk_nehalem-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "saxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "saxpy_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/sdot.c View File

@@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sdot_microk_steamroller-2.c"
#elif defined(NEHALEM)
#include "sdot_microk_nehalem-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "sdot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "sdot_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/sgemv_n_4.c View File

@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sgemv_n_microk_nehalem-4.c"
#elif defined(SANDYBRIDGE)
#include "sgemv_n_microk_sandy-4.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "sgemv_n_microk_haswell-4.c"
#endif



+ 1
- 1
kernel/x86_64/sgemv_t_4.c View File

@@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sgemv_t_microk_bulldozer-4.c"
#elif defined(SANDYBRIDGE)
#include "sgemv_t_microk_sandy-4.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "sgemv_t_microk_haswell-4.c"
#endif



+ 1
- 1
kernel/x86_64/ssymv_L.c View File

@@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ssymv_L_microk_bulldozer-2.c"
#elif defined(NEHALEM)
#include "ssymv_L_microk_nehalem-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "ssymv_L_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "ssymv_L_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/ssymv_U.c View File

@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ssymv_U_microk_bulldozer-2.c"
#elif defined(NEHALEM)
#include "ssymv_U_microk_nehalem-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "ssymv_U_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "ssymv_U_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/symv_L_sse.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12)


+ 1
- 1
kernel/x86_64/symv_L_sse2.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12)


+ 1
- 1
kernel/x86_64/symv_U_sse.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12)


+ 1
- 1
kernel/x86_64/symv_U_sse2.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)


+ 1
- 1
kernel/x86_64/zaxpy.c View File

@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "zaxpy_microk_bulldozer-2.c"
#elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "zaxpy_microk_steamroller-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "zaxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "zaxpy_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/zdot.c View File

@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "zdot_microk_bulldozer-2.c"
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR)
#include "zdot_microk_steamroller-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "zdot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "zdot_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/zgemv_n_4.c View File

@@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"


#if defined(HASWELL)
#if defined(HASWELL) || defined(ZEN)
#include "zgemv_n_microk_haswell-4.c"
#elif defined(SANDYBRIDGE)
#include "zgemv_n_microk_sandy-4.c"


+ 1
- 1
kernel/x86_64/zgemv_t_4.c View File

@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "zgemv_t_microk_bulldozer-4.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "zgemv_t_microk_haswell-4.c"
#endif



+ 1
- 1
kernel/x86_64/zscal.c View File

@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"


#if defined(HASWELL)
#if defined(HASWELL) || defined(ZEN)
#include "zscal_microk_haswell-2.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER)
#include "zscal_microk_bulldozer-2.c"


+ 1
- 1
kernel/x86_64/zsymv_L_sse.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)


+ 1
- 1
kernel/x86_64/zsymv_L_sse2.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)


+ 1
- 1
kernel/x86_64/zsymv_U_sse.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)


+ 1
- 1
kernel/x86_64/zsymv_U_sse2.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)


+ 90
- 0
param.h View File

@@ -595,6 +595,96 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#endif

#ifdef ZEN
#define SNUMOPT 8
#define DNUMOPT 4

#define GEMM_DEFAULT_OFFSET_A 64
#define GEMM_DEFAULT_OFFSET_B 832
#define GEMM_DEFAULT_ALIGN 0x0fffUL

#define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1

#ifdef ARCH_X86
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_N 4
#define SGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_M 2
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 1
#define XGEMM_DEFAULT_UNROLL_M 1
#else
#define SGEMM_DEFAULT_UNROLL_N 2
#define DGEMM_DEFAULT_UNROLL_N 2
#define SGEMM_DEFAULT_UNROLL_M 16
#define DGEMM_DEFAULT_UNROLL_M 8
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_M 4
#define XGEMM_DEFAULT_UNROLL_M 1
#define CGEMM3M_DEFAULT_UNROLL_N 4
#define CGEMM3M_DEFAULT_UNROLL_M 8
#define ZGEMM3M_DEFAULT_UNROLL_N 4
#define ZGEMM3M_DEFAULT_UNROLL_M 4
#define GEMV_UNROLL 8
#endif

#if defined(ARCH_X86_64)
#define SGEMM_DEFAULT_P 768
#define DGEMM_DEFAULT_P 576
#define ZGEMM_DEFAULT_P 288
#define CGEMM_DEFAULT_P 576
#else
#define SGEMM_DEFAULT_P 448
#define DGEMM_DEFAULT_P 480
#define ZGEMM_DEFAULT_P 112
#define CGEMM_DEFAULT_P 224
#endif
#define QGEMM_DEFAULT_P 112
#define XGEMM_DEFAULT_P 56

#if defined(ARCH_X86_64)
#define SGEMM_DEFAULT_Q 192
#define DGEMM_DEFAULT_Q 160
#define ZGEMM_DEFAULT_Q 160
#define CGEMM_DEFAULT_Q 160
#else
#define SGEMM_DEFAULT_Q 224
#define DGEMM_DEFAULT_Q 224
#define ZGEMM_DEFAULT_Q 224
#define CGEMM_DEFAULT_Q 224
#endif
#define QGEMM_DEFAULT_Q 224
#define XGEMM_DEFAULT_Q 224

#define CGEMM3M_DEFAULT_P 448
#define ZGEMM3M_DEFAULT_P 224
#define XGEMM3M_DEFAULT_P 112
#define CGEMM3M_DEFAULT_Q 224
#define ZGEMM3M_DEFAULT_Q 224
#define XGEMM3M_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_R 12288
#define ZGEMM3M_DEFAULT_R 12288
#define XGEMM3M_DEFAULT_R 12288

#define SGEMM_DEFAULT_R 12288
#define QGEMM_DEFAULT_R qgemm_r
#define DGEMM_DEFAULT_R 12288
#define CGEMM_DEFAULT_R cgemm_r
#define ZGEMM_DEFAULT_R zgemm_r
#define XGEMM_DEFAULT_R xgemm_r

#define SYMV_P 16
#define HAVE_EXCLUSIVE_CACHE

#define GEMM_THREAD gemm_thread_mn

#endif

#ifdef ATHLON

#define SNUMOPT 4


Loading…
Cancel
Save