Add in runtime CPU detection for POWER.tags/v0.3.6^2
| @@ -528,6 +528,12 @@ DYNAMIC_CORE += THUNDERX | |||
| DYNAMIC_CORE += THUNDERX2T99 | |||
| endif | |||
| ifeq ($(ARCH), power) | |||
| DYNAMIC_CORE = POWER6 | |||
| DYNAMIC_CORE += POWER8 | |||
| DYNAMIC_CORE += POWER9 | |||
| endif | |||
| # If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty | |||
| ifndef DYNAMIC_CORE | |||
| override DYNAMIC_ARCH= | |||
| @@ -18,8 +18,12 @@ ifeq ($(DYNAMIC_ARCH), 1) | |||
| ifeq ($(ARCH),arm64) | |||
| COMMONOBJS += dynamic_arm64.$(SUFFIX) | |||
| else | |||
| ifeq ($(ARCH),power) | |||
| COMMONOBJS += dynamic_power.$(SUFFIX) | |||
| else | |||
| COMMONOBJS += dynamic.$(SUFFIX) | |||
| endif | |||
| endif | |||
| else | |||
| COMMONOBJS += parameter.$(SUFFIX) | |||
| endif | |||
| @@ -78,8 +82,12 @@ ifeq ($(DYNAMIC_ARCH), 1) | |||
| ifeq ($(ARCH),arm64) | |||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_arm64.$(SUFFIX) | |||
| else | |||
| ifeq ($(ARCH),power) | |||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_power.$(SUFFIX) | |||
| else | |||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) | |||
| endif | |||
| endif | |||
| else | |||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) | |||
| endif | |||
| @@ -0,0 +1,102 @@ | |||
| #include "common.h" | |||
| extern gotoblas_t gotoblas_POWER6; | |||
| extern gotoblas_t gotoblas_POWER8; | |||
| extern gotoblas_t gotoblas_POWER9; | |||
| extern void openblas_warning(int verbose, const char *msg); | |||
| static char *corename[] = { | |||
| "unknown", | |||
| "POWER6", | |||
| "POWER8", | |||
| "POWER9" | |||
| }; | |||
| #define NUM_CORETYPES 4 | |||
| char *gotoblas_corename(void) { | |||
| if (gotoblas == &gotoblas_POWER6) return corename[1]; | |||
| if (gotoblas == &gotoblas_POWER8) return corename[2]; | |||
| if (gotoblas == &gotoblas_POWER9) return corename[3]; | |||
| return corename[0]; | |||
| } | |||
| static gotoblas_t *get_coretype(void) { | |||
| if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x")) | |||
| return &gotoblas_POWER6; | |||
| if (__builtin_cpu_is("power8")) | |||
| return &gotoblas_POWER8; | |||
| if (__builtin_cpu_is("power9")) | |||
| return &gotoblas_POWER9; | |||
| return NULL; | |||
| } | |||
| static gotoblas_t *force_coretype(char * coretype) { | |||
| int i ; | |||
| int found = -1; | |||
| char message[128]; | |||
| for ( i = 0 ; i < NUM_CORETYPES; i++) | |||
| { | |||
| if (!strncasecmp(coretype, corename[i], 20)) | |||
| { | |||
| found = i; | |||
| break; | |||
| } | |||
| } | |||
| switch (found) | |||
| { | |||
| case 1: return (&gotoblas_POWER6); | |||
| case 2: return (&gotoblas_POWER8); | |||
| case 3: return (&gotoblas_POWER9); | |||
| default: return NULL; | |||
| } | |||
| snprintf(message, 128, "Core not found: %s\n", coretype); | |||
| openblas_warning(1, message); | |||
| } | |||
| void gotoblas_dynamic_init(void) { | |||
| char coremsg[128]; | |||
| char coren[22]; | |||
| char *p; | |||
| if (gotoblas) return; | |||
| p = getenv("OPENBLAS_CORETYPE"); | |||
| if ( p ) | |||
| { | |||
| gotoblas = force_coretype(p); | |||
| } | |||
| else | |||
| { | |||
| gotoblas = get_coretype(); | |||
| } | |||
| if (gotoblas == NULL) | |||
| { | |||
| snprintf(coremsg, 128, "Falling back to POWER8 core\n"); | |||
| openblas_warning(1, coremsg); | |||
| gotoblas = &gotoblas_POWER8; | |||
| } | |||
| if (gotoblas && gotoblas -> init) { | |||
| strncpy(coren,gotoblas_corename(),20); | |||
| sprintf(coremsg, "Core: %s\n",coren); | |||
| openblas_warning(2, coremsg); | |||
| gotoblas -> init(); | |||
| } else { | |||
| openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); | |||
| exit(1); | |||
| } | |||
| } | |||
| void gotoblas_dynamic_quit(void) { | |||
| gotoblas = NULL; | |||
| } | |||
| @@ -13,40 +13,40 @@ SGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||
| SGEMMITCOPY = sgemm_tcopy_16_power8.S | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_8.c | |||
| SGEMMOTCOPY = sgemm_tcopy_8_power8.S | |||
| SGEMMINCOPYOBJ = sgemm_incopy.o | |||
| SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMKERNEL = dgemm_kernel_16x4_power8.S | |||
| DGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||
| DGEMMITCOPY = dgemm_tcopy_16_power8.S | |||
| DGEMMONCOPY = dgemm_ncopy_4_power8.S | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
| DGEMMINCOPYOBJ = dgemm_incopy.o | |||
| DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMKERNEL = cgemm_kernel_8x4_power8.S | |||
| CGEMMINCOPY = ../generic/zgemm_ncopy_8.c | |||
| CGEMMITCOPY = cgemm_tcopy_8_power8.S | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
| CGEMMINCOPYOBJ = cgemm_incopy.o | |||
| CGEMMITCOPYOBJ = cgemm_itcopy.o | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMKERNEL = zgemm_kernel_8x2_power8.S | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c | |||
| ZGEMMITCOPY = zgemm_tcopy_8_power8.S | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
| ZGEMMINCOPYOBJ = zgemm_incopy.o | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy.o | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| @@ -13,40 +13,40 @@ SGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||
| SGEMMITCOPY = sgemm_tcopy_16_power8.S | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_8.c | |||
| SGEMMOTCOPY = sgemm_tcopy_8_power8.S | |||
| SGEMMINCOPYOBJ = sgemm_incopy.o | |||
| SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMKERNEL = dgemm_kernel_power9.S | |||
| DGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||
| DGEMMITCOPY = dgemm_tcopy_16_power8.S | |||
| DGEMMONCOPY = dgemm_ncopy_4_power8.S | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
| DGEMMINCOPYOBJ = dgemm_incopy.o | |||
| DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMKERNEL = cgemm_kernel_8x4_power8.S | |||
| CGEMMINCOPY = ../generic/zgemm_ncopy_8.c | |||
| CGEMMITCOPY = cgemm_tcopy_8_power8.S | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
| CGEMMINCOPYOBJ = cgemm_incopy.o | |||
| CGEMMITCOPYOBJ = cgemm_itcopy.o | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMKERNEL = zgemm_kernel_8x2_power8.S | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c | |||
| ZGEMMITCOPY = zgemm_tcopy_8_power8.S | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
| ZGEMMINCOPYOBJ = zgemm_incopy.o | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy.o | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| @@ -718,6 +718,27 @@ static void init_parameter(void) { | |||
| } | |||
| #else // defined(ARCH_ARM64) | |||
| #if defined(ARCH_POWER) | |||
| static void init_parameter(void) { | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | |||
| TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | |||
| TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | |||
| TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | |||
| TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||
| TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | |||
| TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | |||
| TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; | |||
| } | |||
| #else //POWER | |||
| #ifdef ARCH_X86 | |||
| static int get_l2_size_old(void){ | |||
| int i, eax, ebx, ecx, edx, cpuid_level; | |||
| @@ -1303,4 +1324,5 @@ static void init_parameter(void) { | |||
| } | |||
| #endif //POWER | |||
| #endif //defined(ARCH_ARM64) | |||