Browse Source

Merge pull request #483 from wernsaar/develop

added Steamroller as a  cpu target
tags/v0.2.14^2
Zhang Xianyi 11 years ago
parent
commit
17b9db20f1
18 changed files with 261 additions and 15 deletions
  1. +7
    -1
      Makefile.system
  2. +1
    -0
      README.md
  3. +1
    -0
      TargetList.txt
  4. +1
    -1
      common_x86.h
  5. +1
    -1
      common_x86_64.h
  6. +6
    -4
      cpuid.h
  7. +18
    -0
      cpuid_x86.c
  8. +16
    -1
      driver/others/dynamic.c
  9. +2
    -2
      driver/others/parameter.c
  10. +17
    -0
      getarch.c
  11. +17
    -0
      kernel/setparam-ref.c
  12. +76
    -0
      kernel/x86_64/KERNEL.STEAMROLLER
  13. +1
    -1
      kernel/x86_64/ddot.c
  14. +1
    -1
      kernel/x86_64/sdot.c
  15. +1
    -1
      kernel/x86_64/sgemv_n_4.c
  16. +1
    -1
      kernel/x86_64/sgemv_t_4.c
  17. +1
    -1
      kernel/x86_64/zgemv_t_4.c
  18. +93
    -0
      param.h

+ 7
- 1
Makefile.system View File

@@ -61,6 +61,9 @@ endif
ifeq ($(TARGET), PILEDRIVER) ifeq ($(TARGET), PILEDRIVER)
GETARCH_FLAGS := -DFORCE_BARCELONA GETARCH_FLAGS := -DFORCE_BARCELONA
endif endif
ifeq ($(TARGET), STEAMROLLER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
endif endif




@@ -85,6 +88,9 @@ endif
ifeq ($(TARGET_CORE), PILEDRIVER) ifeq ($(TARGET_CORE), PILEDRIVER)
GETARCH_FLAGS := -DFORCE_BARCELONA GETARCH_FLAGS := -DFORCE_BARCELONA
endif endif
ifeq ($(TARGET_CORE), STEAMROLLER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
endif endif




@@ -392,7 +398,7 @@ endif
ifeq ($(ARCH), x86_64) ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1) ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER
endif endif
ifneq ($(NO_AVX2), 1) ifneq ($(NO_AVX2), 1)
DYNAMIC_CORE += HASWELL DYNAMIC_CORE += HASWELL


+ 1
- 0
README.md View File

@@ -60,6 +60,7 @@ Please read GotoBLAS_01Readme.txt
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes. - **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thank Werner Saar) - **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thank Werner Saar)
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations. - **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.


#### MIPS64: #### MIPS64:
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2. - **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.


+ 1
- 0
TargetList.txt View File

@@ -32,6 +32,7 @@ ISTANBUL
BOBCAT BOBCAT
BULLDOZER BULLDOZER
PILEDRIVER PILEDRIVER
STEAMROLLER


c)VIA CPU: c)VIA CPU:
SSE_GENERIC SSE_GENERIC


+ 1
- 1
common_x86.h View File

@@ -171,7 +171,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
#define MMXSTORE movd #define MMXSTORE movd
#endif #endif


#if defined(PILEDRIVER) || defined(BULLDOZER)
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
//Enable some optimazation for barcelona. //Enable some optimazation for barcelona.
#define BARCELONA_OPTIMIZATION #define BARCELONA_OPTIMIZATION
#endif #endif


+ 1
- 1
common_x86_64.h View File

@@ -226,7 +226,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){


#ifdef ASSEMBLER #ifdef ASSEMBLER


#if defined(PILEDRIVER) || defined(BULLDOZER)
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
//Enable some optimazation for barcelona. //Enable some optimazation for barcelona.
#define BARCELONA_OPTIMIZATION #define BARCELONA_OPTIMIZATION
#endif #endif


+ 6
- 4
cpuid.h View File

@@ -104,10 +104,11 @@
#define CORE_ATOM 18 #define CORE_ATOM 18
#define CORE_NANO 19 #define CORE_NANO 19
#define CORE_SANDYBRIDGE 20 #define CORE_SANDYBRIDGE 20
#define CORE_BOBCAT 21
#define CORE_BULLDOZER 22
#define CORE_BOBCAT 21
#define CORE_BULLDOZER 22
#define CORE_PILEDRIVER 23 #define CORE_PILEDRIVER 23
#define CORE_HASWELL 24
#define CORE_HASWELL 24
#define CORE_STEAMROLLER 25


#define HAVE_SSE (1 << 0) #define HAVE_SSE (1 << 0)
#define HAVE_SSE2 (1 << 1) #define HAVE_SSE2 (1 << 1)
@@ -200,6 +201,7 @@ typedef struct {
#define CPUTYPE_BOBCAT 45 #define CPUTYPE_BOBCAT 45
#define CPUTYPE_BULLDOZER 46 #define CPUTYPE_BULLDOZER 46
#define CPUTYPE_PILEDRIVER 47 #define CPUTYPE_PILEDRIVER 47
#define CPUTYPE_HASWELL 48
#define CPUTYPE_HASWELL 48
#define CPUTYPE_STEAMROLLER 49


#endif #endif

+ 18
- 0
cpuid_x86.c View File

@@ -1162,6 +1162,12 @@ int get_cpuname(void){
return CPUTYPE_PILEDRIVER; return CPUTYPE_PILEDRIVER;
else else
return CPUTYPE_BARCELONA; //OS don't support AVX. return CPUTYPE_BARCELONA; //OS don't support AVX.
case 0:
if(support_avx())
return CPUTYPE_STEAMROLLER;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.

} }
break; break;
case 5: case 5:
@@ -1290,6 +1296,7 @@ static char *cpuname[] = {
"BULLDOZER", "BULLDOZER",
"PILEDRIVER", "PILEDRIVER",
"HASWELL", "HASWELL",
"STEAMROLLER",
}; };


static char *lowercpuname[] = { static char *lowercpuname[] = {
@@ -1341,6 +1348,7 @@ static char *lowercpuname[] = {
"bulldozer", "bulldozer",
"piledriver", "piledriver",
"haswell", "haswell",
"steamroller",
}; };


static char *corename[] = { static char *corename[] = {
@@ -1369,6 +1377,7 @@ static char *corename[] = {
"BULLDOZER", "BULLDOZER",
"PILEDRIVER", "PILEDRIVER",
"HASWELL", "HASWELL",
"STEAMROLLER",
}; };


static char *corename_lower[] = { static char *corename_lower[] = {
@@ -1397,6 +1406,7 @@ static char *corename_lower[] = {
"bulldozer", "bulldozer",
"piledriver", "piledriver",
"haswell", "haswell",
"steamroller",
}; };




@@ -1562,7 +1572,15 @@ int get_coretype(void){
return CORE_PILEDRIVER; return CORE_PILEDRIVER;
else else
return CORE_BARCELONA; //OS don't support AVX. return CORE_BARCELONA; //OS don't support AVX.
case 0:
if(support_avx())
return CORE_STEAMROLLER;
else
return CORE_BARCELONA; //OS don't support AVX.
} }


}else return CORE_BARCELONA; }else return CORE_BARCELONA;
} }
} }


+ 16
- 1
driver/others/dynamic.c View File

@@ -66,6 +66,7 @@ extern gotoblas_t gotoblas_BOBCAT;
extern gotoblas_t gotoblas_SANDYBRIDGE; extern gotoblas_t gotoblas_SANDYBRIDGE;
extern gotoblas_t gotoblas_BULLDOZER; extern gotoblas_t gotoblas_BULLDOZER;
extern gotoblas_t gotoblas_PILEDRIVER; extern gotoblas_t gotoblas_PILEDRIVER;
extern gotoblas_t gotoblas_STEAMROLLER;
#ifdef NO_AVX2 #ifdef NO_AVX2
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE #define gotoblas_HASWELL gotoblas_SANDYBRIDGE
#else #else
@@ -77,6 +78,7 @@ extern gotoblas_t gotoblas_HASWELL;
#define gotoblas_HASWELL gotoblas_NEHALEM #define gotoblas_HASWELL gotoblas_NEHALEM
#define gotoblas_BULLDOZER gotoblas_BARCELONA #define gotoblas_BULLDOZER gotoblas_BARCELONA
#define gotoblas_PILEDRIVER gotoblas_BARCELONA #define gotoblas_PILEDRIVER gotoblas_BARCELONA
#define gotoblas_STEAMROLLER gotoblas_BARCELONA
#endif #endif




@@ -275,7 +277,17 @@ static gotoblas_t *get_coretype(void){
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
} }
}else if(model == 0){
//AMD STEAMROLLER
if(support_avx())
return &gotoblas_STEAMROLLER;
else{
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
}
} }


} else { } else {
return &gotoblas_BARCELONA; return &gotoblas_BARCELONA;
} }
@@ -315,6 +327,7 @@ static char *corename[] = {
"Bulldozer", "Bulldozer",
"Piledriver", "Piledriver",
"Haswell", "Haswell",
"Steamroller",
}; };


char *gotoblas_corename(void) { char *gotoblas_corename(void) {
@@ -339,6 +352,7 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_BULLDOZER) return corename[18]; if (gotoblas == &gotoblas_BULLDOZER) return corename[18];
if (gotoblas == &gotoblas_PILEDRIVER) return corename[19]; if (gotoblas == &gotoblas_PILEDRIVER) return corename[19];
if (gotoblas == &gotoblas_HASWELL) return corename[20]; if (gotoblas == &gotoblas_HASWELL) return corename[20];
if (gotoblas == &gotoblas_STEAMROLLER) return corename[21];


return corename[0]; return corename[0];
} }
@@ -351,7 +365,7 @@ static gotoblas_t *force_coretype(char *coretype){
char message[128]; char message[128];
char mname[20]; char mname[20];


for ( i=1 ; i <= 20; i++)
for ( i=1 ; i <= 21; i++)
{ {
if (!strncasecmp(coretype,corename[i],20)) if (!strncasecmp(coretype,corename[i],20))
{ {
@@ -370,6 +384,7 @@ static gotoblas_t *force_coretype(char *coretype){
switch (found) switch (found)
{ {


case 21: return (&gotoblas_STEAMROLLER);
case 20: return (&gotoblas_HASWELL); case 20: return (&gotoblas_HASWELL);
case 19: return (&gotoblas_PILEDRIVER); case 19: return (&gotoblas_PILEDRIVER);
case 18: return (&gotoblas_BULLDOZER); case 18: return (&gotoblas_BULLDOZER);


+ 2
- 2
driver/others/parameter.c View File

@@ -166,7 +166,7 @@ int get_L2_size(void){
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ #if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \ defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
defined(PILEDRIVER) || defined(HASWELL)
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER)


cpuid(0x80000006, &eax, &ebx, &ecx, &edx); cpuid(0x80000006, &eax, &ebx, &ecx, &edx);


@@ -251,7 +251,7 @@ void blas_set_parameter(void){


env_var_t p; env_var_t p;
int factor; int factor;
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL)
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER)
int size = 16; int size = 16;
#else #else
int size = get_L2_size(); int size = get_L2_size();


+ 17
- 0
getarch.c View File

@@ -432,6 +432,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "PILEDRIVER" #define CORENAME "PILEDRIVER"
#endif #endif


#if defined (FORCE_STEAMROLLER)
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
#define SUBARCHITECTURE "STEAMROLLER"
#define ARCHCONFIG "-DSTEAMROLLER " \
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL3_SIZE=12582912 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
"-DHAVE_AVX -DHAVE_FMA4 -DHAVE_FMA3"
#define LIBNAME "steamroller"
#define CORENAME "STEAMROLLER"
#endif


#ifdef FORCE_SSE_GENERIC #ifdef FORCE_SSE_GENERIC
#define FORCE #define FORCE
#define FORCE_INTEL #define FORCE_INTEL


+ 17
- 0
kernel/setparam-ref.c View File

@@ -941,6 +941,23 @@ static void init_parameter(void) {
#endif #endif
#endif #endif


#ifdef STEAMROLLER

#ifdef DEBUG
fprintf(stderr, "Steamroller\n");
#endif

TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif


#ifdef NANO #ifdef NANO


#ifdef DEBUG #ifdef DEBUG


+ 76
- 0
kernel/x86_64/KERNEL.STEAMROLLER View File

@@ -0,0 +1,76 @@
SGEMVNKERNEL = sgemv_n_4.c
SGEMVTKERNEL = sgemv_t_4.c

ZGEMVNKERNEL = zgemv_n_dup.S
ZGEMVTKERNEL = zgemv_t_4.c

DGEMVNKERNEL = dgemv_n_bulldozer.S
DGEMVTKERNEL = dgemv_t_bulldozer.S

DDOTKERNEL = ddot_bulldozer.S
DCOPYKERNEL = dcopy_bulldozer.S

SGEMMKERNEL = sgemm_kernel_16x2_piledriver.S
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
SGEMMONCOPY = gemm_ncopy_2_bulldozer.S
SGEMMOTCOPY = gemm_tcopy_2_bulldozer.S
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)

DGEMMKERNEL = dgemm_kernel_8x2_piledriver.S
DGEMMINCOPY = dgemm_ncopy_8_bulldozer.S
DGEMMITCOPY = dgemm_tcopy_8_bulldozer.S
DGEMMONCOPY = gemm_ncopy_2_bulldozer.S
DGEMMOTCOPY = gemm_tcopy_2_bulldozer.S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)

CGEMMKERNEL = cgemm_kernel_4x2_piledriver.S
CGEMMINCOPY = ../generic/zgemm_ncopy_4.c
CGEMMITCOPY = ../generic/zgemm_tcopy_4.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMKERNEL = zgemm_kernel_2x2_piledriver.S
ZGEMMINCOPY =
ZGEMMITCOPY =
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMINCOPYOBJ =
ZGEMMITCOPYOBJ =
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)

CGEMM3MKERNEL = zgemm3m_kernel_8x4_barcelona.S
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_barcelona.S

STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c


DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = dtrsm_kernel_LT_8x2_bulldozer.S
DTRSMKERNEL_RN = dtrsm_kernel_RN_8x2_bulldozer.S
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c



+ 1
- 1
kernel/x86_64/ddot.c View File

@@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"




#if defined(BULLDOZER) || defined(PILEDRIVER)
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
#include "ddot_microk_bulldozer-2.c" #include "ddot_microk_bulldozer-2.c"
#elif defined(NEHALEM) #elif defined(NEHALEM)
#include "ddot_microk_nehalem-2.c" #include "ddot_microk_nehalem-2.c"


+ 1
- 1
kernel/x86_64/sdot.c View File

@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"


#if defined(BULLDOZER) || defined(PILEDRIVER)
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
#include "sdot_microk_bulldozer-2.c" #include "sdot_microk_bulldozer-2.c"
#elif defined(NEHALEM) #elif defined(NEHALEM)
#include "sdot_microk_nehalem-2.c" #include "sdot_microk_nehalem-2.c"


+ 1
- 1
kernel/x86_64/sgemv_n_4.c View File

@@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"




#if defined(BULLDOZER) || defined(PILEDRIVER)
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
#include "sgemv_n_microk_bulldozer-4.c" #include "sgemv_n_microk_bulldozer-4.c"
#elif defined(NEHALEM) #elif defined(NEHALEM)
#include "sgemv_n_microk_nehalem-4.c" #include "sgemv_n_microk_nehalem-4.c"


+ 1
- 1
kernel/x86_64/sgemv_t_4.c View File

@@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#if defined(NEHALEM) #if defined(NEHALEM)
#include "sgemv_t_microk_nehalem-4.c" #include "sgemv_t_microk_nehalem-4.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER)
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
#include "sgemv_t_microk_bulldozer-4.c" #include "sgemv_t_microk_bulldozer-4.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "sgemv_t_microk_sandy-4.c" #include "sgemv_t_microk_sandy-4.c"


+ 1
- 1
kernel/x86_64/zgemv_t_4.c View File

@@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"




#if defined(BULLDOZER) || defined(PILEDRIVER)
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
#include "zgemv_t_microk_bulldozer-4.c" #include "zgemv_t_microk_bulldozer-4.c"
#elif defined(HASWELL) #elif defined(HASWELL)
#include "zgemv_t_microk_haswell-4.c" #include "zgemv_t_microk_haswell-4.c"


+ 93
- 0
param.h View File

@@ -406,6 +406,99 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#ifdef STEAMROLLER
#define SNUMOPT 8
#define DNUMOPT 4

#define GEMM_DEFAULT_OFFSET_A 64
#define GEMM_DEFAULT_OFFSET_B 832
#define GEMM_DEFAULT_ALIGN 0x0fffUL



#define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1

#ifdef ARCH_X86
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_N 4
#define SGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_M 2
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 1
#define XGEMM_DEFAULT_UNROLL_M 1
#else
#define SGEMM_DEFAULT_UNROLL_N 2
#define DGEMM_DEFAULT_UNROLL_N 2
#define SGEMM_DEFAULT_UNROLL_M 16
#define DGEMM_DEFAULT_UNROLL_M 8
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_M 2
#define XGEMM_DEFAULT_UNROLL_M 1
#define CGEMM3M_DEFAULT_UNROLL_N 4
#define CGEMM3M_DEFAULT_UNROLL_M 8
#define ZGEMM3M_DEFAULT_UNROLL_N 4
#define ZGEMM3M_DEFAULT_UNROLL_M 4
#define GEMV_UNROLL 8
#endif

#if defined(ARCH_X86_64)
#define SGEMM_DEFAULT_P 768
#define DGEMM_DEFAULT_P 768
#define ZGEMM_DEFAULT_P 384
#define CGEMM_DEFAULT_P 768
#else
#define SGEMM_DEFAULT_P 448
#define DGEMM_DEFAULT_P 480
#define ZGEMM_DEFAULT_P 112
#define CGEMM_DEFAULT_P 224
#endif
#define QGEMM_DEFAULT_P 112
#define XGEMM_DEFAULT_P 56

#if defined(ARCH_X86_64)
#define SGEMM_DEFAULT_Q 192
#define DGEMM_DEFAULT_Q 168
#define ZGEMM_DEFAULT_Q 168
#define CGEMM_DEFAULT_Q 168
#else
#define SGEMM_DEFAULT_Q 224
#define DGEMM_DEFAULT_Q 224
#define ZGEMM_DEFAULT_Q 224
#define CGEMM_DEFAULT_Q 224
#endif
#define QGEMM_DEFAULT_Q 224
#define XGEMM_DEFAULT_Q 224

#define CGEMM3M_DEFAULT_P 448
#define ZGEMM3M_DEFAULT_P 224
#define XGEMM3M_DEFAULT_P 112
#define CGEMM3M_DEFAULT_Q 224
#define ZGEMM3M_DEFAULT_Q 224
#define XGEMM3M_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_R 12288
#define ZGEMM3M_DEFAULT_R 12288
#define XGEMM3M_DEFAULT_R 12288

#define SGEMM_DEFAULT_R 12288
#define QGEMM_DEFAULT_R qgemm_r
#define DGEMM_DEFAULT_R 12288
#define CGEMM_DEFAULT_R cgemm_r
#define ZGEMM_DEFAULT_R zgemm_r
#define XGEMM_DEFAULT_R xgemm_r

#define SYMV_P 16
#define HAVE_EXCLUSIVE_CACHE

#define GEMM_THREAD gemm_thread_mn

#endif


#ifdef ATHLON #ifdef ATHLON


#define SNUMOPT 4 #define SNUMOPT 4


Loading…
Cancel
Save