Enable DYNAMIC_ARCH feature on ARM64. This patch uses the cpuid feature in linux kernel to detect the core type at runtime (https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt). If this feature is missing in kernel, then the user should use the OPENBLAS_CORETYPE env variable to select the desired core type.tags/v0.3.4
| @@ -510,6 +510,13 @@ CCOMMON_OPT += $(XCCOMMON_OPT) | |||
| #CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)' | |||
| endif | |||
| ifeq ($(ARCH), arm64) | |||
| DYNAMIC_CORE = ARMV8 | |||
| DYNAMIC_CORE += CORTEXA57 | |||
| DYNAMIC_CORE += THUNDERX | |||
| DYNAMIC_CORE += THUNDERX2T99 | |||
| endif | |||
| # If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty | |||
| ifndef DYNAMIC_CORE | |||
| override DYNAMIC_ARCH= | |||
| @@ -15,7 +15,11 @@ endif | |||
| # COMMONOBJS += info.$(SUFFIX) | |||
| ifeq ($(DYNAMIC_ARCH), 1) | |||
| ifeq ($(ARCH),arm64) | |||
| COMMONOBJS += dynamic_arm64.$(SUFFIX) | |||
| else | |||
| COMMONOBJS += dynamic.$(SUFFIX) | |||
| endif | |||
| else | |||
| COMMONOBJS += parameter.$(SUFFIX) | |||
| endif | |||
| @@ -71,7 +75,11 @@ BLAS_SERVER = blas_server.c | |||
| endif | |||
| ifeq ($(DYNAMIC_ARCH), 1) | |||
| ifeq ($(ARCH),arm64) | |||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_arm64.$(SUFFIX) | |||
| else | |||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) | |||
| endif | |||
| else | |||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) | |||
| endif | |||
| @@ -0,0 +1,198 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| /* without modification, are permitted provided that the following */ | |||
| /* conditions are met: */ | |||
| /* */ | |||
| /* 1. Redistributions of source code must retain the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer. */ | |||
| /* */ | |||
| /* 2. Redistributions in binary form must reproduce the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer in the documentation and/or other materials */ | |||
| /* provided with the distribution. */ | |||
| /* */ | |||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||
| /* */ | |||
| /* The views and conclusions contained in the software and */ | |||
| /* documentation are those of the authors and should not be */ | |||
| /* interpreted as representing official policies, either expressed */ | |||
| /* or implied, of The University of Texas at Austin. */ | |||
| /*********************************************************************/ | |||
| #include "common.h" | |||
| #include <asm/hwcap.h> | |||
| #include <sys/auxv.h> | |||
| extern gotoblas_t gotoblas_ARMV8; | |||
| extern gotoblas_t gotoblas_CORTEXA57; | |||
| extern gotoblas_t gotoblas_THUNDERX; | |||
| extern gotoblas_t gotoblas_THUNDERX2T99; | |||
| extern void openblas_warning(int verbose, const char * msg); | |||
| #define NUM_CORETYPES 4 | |||
| /* | |||
| * In case asm/hwcap.h is outdated on the build system, make sure | |||
| * that HWCAP_CPUID is defined | |||
| */ | |||
| #ifndef HWCAP_CPUID | |||
| #define HWCAP_CPUID (1 << 11) | |||
| #endif | |||
| #define get_cpu_ftr(id, var) ({ \ | |||
| asm("mrs %0, "#id : "=r" (var)); \ | |||
| }) | |||
| static char *corename[] = { | |||
| "armv8", | |||
| "cortexa57", | |||
| "thunderx", | |||
| "thunderx2t99", | |||
| "unknown" | |||
| }; | |||
| char *gotoblas_corename(void) { | |||
| if (gotoblas == &gotoblas_ARMV8) return corename[ 0]; | |||
| if (gotoblas == &gotoblas_CORTEXA57) return corename[ 1]; | |||
| if (gotoblas == &gotoblas_THUNDERX) return corename[ 2]; | |||
| if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 3]; | |||
| return corename[NUM_CORETYPES]; | |||
| } | |||
| static gotoblas_t *force_coretype(char *coretype) { | |||
| int i ; | |||
| int found = -1; | |||
| char message[128]; | |||
| for ( i=0 ; i < NUM_CORETYPES; i++) | |||
| { | |||
| if (!strncasecmp(coretype, corename[i], 20)) | |||
| { | |||
| found = i; | |||
| break; | |||
| } | |||
| } | |||
| switch (found) | |||
| { | |||
| case 0: return (&gotoblas_ARMV8); | |||
| case 1: return (&gotoblas_CORTEXA57); | |||
| case 2: return (&gotoblas_THUNDERX); | |||
| case 3: return (&gotoblas_THUNDERX2T99); | |||
| } | |||
| snprintf(message, 128, "Core not found: %s\n", coretype); | |||
| openblas_warning(1, message); | |||
| return NULL; | |||
| } | |||
| static gotoblas_t *get_coretype(void) { | |||
| int implementer, variant, part, arch, revision, midr_el1; | |||
| if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) { | |||
| char coremsg[128]; | |||
| snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n"); | |||
| openblas_warning(1, coremsg); | |||
| return NULL; | |||
| } | |||
| get_cpu_ftr(MIDR_EL1, midr_el1); | |||
| /* | |||
| * MIDR_EL1 | |||
| * | |||
| * 31 24 23 20 19 16 15 4 3 0 | |||
| * ----------------------------------------------------------------- | |||
| * | Implementer | Variant | Architecture | Part Number | Revision | | |||
| * ----------------------------------------------------------------- | |||
| */ | |||
| implementer = (midr_el1 >> 24) & 0xFF; | |||
| part = (midr_el1 >> 4) & 0xFFF; | |||
| switch(implementer) | |||
| { | |||
| case 0x41: // ARM | |||
| switch (part) | |||
| { | |||
| case 0xd07: // Cortex A57 | |||
| case 0xd08: // Cortex A72 | |||
| case 0xd03: // Cortex A53 | |||
| return &gotoblas_CORTEXA57; | |||
| } | |||
| break; | |||
| case 0x42: // Broadcom | |||
| switch (part) | |||
| { | |||
| case 0x516: // Vulcan | |||
| return &gotoblas_THUNDERX2T99; | |||
| } | |||
| break; | |||
| case 0x43: // Cavium | |||
| switch (part) | |||
| { | |||
| case 0x0a1: // ThunderX | |||
| return &gotoblas_THUNDERX; | |||
| case 0x0af: // ThunderX2 | |||
| return &gotoblas_THUNDERX2T99; | |||
| } | |||
| break; | |||
| } | |||
| return NULL; | |||
| } | |||
| void gotoblas_dynamic_init(void) { | |||
| char coremsg[128]; | |||
| char coren[22]; | |||
| char *p; | |||
| if (gotoblas) return; | |||
| p = getenv("OPENBLAS_CORETYPE"); | |||
| if ( p ) | |||
| { | |||
| gotoblas = force_coretype(p); | |||
| } | |||
| else | |||
| { | |||
| gotoblas = get_coretype(); | |||
| } | |||
| if (gotoblas == NULL) | |||
| { | |||
| snprintf(coremsg, 128, "Falling back to generic ARMV8 core\n"); | |||
| openblas_warning(1, coremsg); | |||
| gotoblas = &gotoblas_ARMV8; | |||
| } | |||
| if (gotoblas && gotoblas->init) { | |||
| strncpy(coren, gotoblas_corename(), 20); | |||
| sprintf(coremsg, "Core: %s\n", coren); | |||
| openblas_warning(2, coremsg); | |||
| gotoblas -> init(); | |||
| } else { | |||
| openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); | |||
| exit(1); | |||
| } | |||
| } | |||
| void gotoblas_dynamic_quit(void) { | |||
| gotoblas = NULL; | |||
| } | |||
| @@ -113,13 +113,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||
| ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||
| SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||
| SGEMMINCOPYOBJ = sgemm_incopy.o | |||
| SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||
| DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||
| @@ -134,8 +134,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||
| DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | |||
| endif | |||
| DGEMMINCOPYOBJ = dgemm_incopy.o | |||
| DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(DGEMM_UNROLL_N), 4) | |||
| @@ -146,34 +146,34 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | |||
| endif | |||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||
| CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||
| ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | |||
| CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | |||
| CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | |||
| CGEMMINCOPYOBJ = cgemm_incopy.o | |||
| CGEMMITCOPYOBJ = cgemm_itcopy.o | |||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||
| ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||
| ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | |||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | |||
| ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | |||
| ZGEMMINCOPYOBJ = zgemm_incopy.o | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy.o | |||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4) | |||
| DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S | |||
| @@ -201,25 +201,25 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| @@ -111,13 +111,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||
| ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||
| SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||
| SGEMMINCOPYOBJ = sgemm_incopy.o | |||
| SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||
| DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||
| @@ -132,8 +132,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||
| DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | |||
| endif | |||
| DGEMMINCOPYOBJ = dgemm_incopy.o | |||
| DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(DGEMM_UNROLL_N), 4) | |||
| @@ -144,32 +144,32 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | |||
| endif | |||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||
| CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||
| ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | |||
| CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | |||
| CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | |||
| CGEMMINCOPYOBJ = cgemm_incopy.o | |||
| CGEMMITCOPYOBJ = cgemm_itcopy.o | |||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||
| ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||
| ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | |||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | |||
| ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | |||
| ZGEMMINCOPYOBJ = zgemm_incopy.o | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy.o | |||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| @@ -89,26 +89,26 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| SGEMMKERNEL = sgemm_kernel_4x4.S | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| @@ -74,13 +74,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||
| ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||
| SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||
| SGEMMINCOPYOBJ = sgemm_incopy.o | |||
| SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||
| @@ -94,8 +94,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||
| DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | |||
| endif | |||
| DGEMMINCOPYOBJ = dgemm_incopy.o | |||
| DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(DGEMM_UNROLL_N), 4) | |||
| @@ -106,32 +106,32 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | |||
| endif | |||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||
| ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | |||
| CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | |||
| CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | |||
| CGEMMINCOPYOBJ = cgemm_incopy.o | |||
| CGEMMITCOPYOBJ = cgemm_itcopy.o | |||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||
| ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | |||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | |||
| ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | |||
| ZGEMMINCOPYOBJ = zgemm_incopy.o | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy.o | |||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| SASUMKERNEL = sasum_thunderx2t99.c | |||
| DASUMKERNEL = dasum_thunderx2t99.c | |||
| @@ -646,6 +646,78 @@ gotoblas_t TABLE_NAME = { | |||
| }; | |||
| #if defined(ARCH_ARM64) | |||
| static void init_parameter(void) { | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||
| TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | |||
| TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | |||
| TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; | |||
| TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | |||
| TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | |||
| TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | |||
| TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
| TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
| TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q; | |||
| TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q; | |||
| TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R; | |||
| TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R; | |||
| #endif | |||
| #if defined(USE_GEMM3M) | |||
| #ifdef CGEMM3M_DEFAULT_P | |||
| TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P; | |||
| #else | |||
| TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p; | |||
| #endif | |||
| #ifdef ZGEMM3M_DEFAULT_P | |||
| TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P; | |||
| #else | |||
| TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p; | |||
| #endif | |||
| #ifdef CGEMM3M_DEFAULT_Q | |||
| TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q; | |||
| #else | |||
| TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q; | |||
| #endif | |||
| #ifdef ZGEMM3M_DEFAULT_Q | |||
| TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q; | |||
| #else | |||
| TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q; | |||
| #endif | |||
| #ifdef CGEMM3M_DEFAULT_R | |||
| TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R; | |||
| #else | |||
| TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r; | |||
| #endif | |||
| #ifdef ZGEMM3M_DEFAULT_R | |||
| TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R; | |||
| #else | |||
| TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p; | |||
| TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q; | |||
| TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r; | |||
| #endif | |||
| #endif | |||
| } | |||
| #else // defined(ARCH_ARM64) | |||
| #ifdef ARCH_X86 | |||
| static int get_l2_size_old(void){ | |||
| int i, eax, ebx, ecx, edx, cpuid_level; | |||
| @@ -1231,3 +1303,4 @@ static void init_parameter(void) { | |||
| } | |||
| #endif //defined(ARCH_ARM64) | |||