Add DYNAMIC_ARCH support for ARM64tags/v0.3.4
| @@ -510,6 +510,13 @@ CCOMMON_OPT += $(XCCOMMON_OPT) | |||||
| #CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)' | #CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)' | ||||
| endif | endif | ||||
| ifeq ($(ARCH), arm64) | |||||
| DYNAMIC_CORE = ARMV8 | |||||
| DYNAMIC_CORE += CORTEXA57 | |||||
| DYNAMIC_CORE += THUNDERX | |||||
| DYNAMIC_CORE += THUNDERX2T99 | |||||
| endif | |||||
| # If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty | # If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty | ||||
| ifndef DYNAMIC_CORE | ifndef DYNAMIC_CORE | ||||
| override DYNAMIC_ARCH= | override DYNAMIC_ARCH= | ||||
| @@ -237,7 +237,6 @@ void get_cpuconfig(void) | |||||
| break; | break; | ||||
| case CPU_THUNDERX: | case CPU_THUNDERX: | ||||
| printf("#define ARMV8\n"); | |||||
| printf("#define THUNDERX\n"); | printf("#define THUNDERX\n"); | ||||
| printf("#define L1_DATA_SIZE 32768\n"); | printf("#define L1_DATA_SIZE 32768\n"); | ||||
| printf("#define L1_DATA_LINESIZE 128\n"); | printf("#define L1_DATA_LINESIZE 128\n"); | ||||
| @@ -15,7 +15,11 @@ endif | |||||
| # COMMONOBJS += info.$(SUFFIX) | # COMMONOBJS += info.$(SUFFIX) | ||||
| ifeq ($(DYNAMIC_ARCH), 1) | ifeq ($(DYNAMIC_ARCH), 1) | ||||
| ifeq ($(ARCH),arm64) | |||||
| COMMONOBJS += dynamic_arm64.$(SUFFIX) | |||||
| else | |||||
| COMMONOBJS += dynamic.$(SUFFIX) | COMMONOBJS += dynamic.$(SUFFIX) | ||||
| endif | |||||
| else | else | ||||
| COMMONOBJS += parameter.$(SUFFIX) | COMMONOBJS += parameter.$(SUFFIX) | ||||
| endif | endif | ||||
| @@ -71,7 +75,11 @@ BLAS_SERVER = blas_server.c | |||||
| endif | endif | ||||
| ifeq ($(DYNAMIC_ARCH), 1) | ifeq ($(DYNAMIC_ARCH), 1) | ||||
| ifeq ($(ARCH),arm64) | |||||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_arm64.$(SUFFIX) | |||||
| else | |||||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) | HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) | ||||
| endif | |||||
| else | else | ||||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) | HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) | ||||
| endif | endif | ||||
| @@ -0,0 +1,198 @@ | |||||
| /*********************************************************************/ | |||||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||||
| /* All rights reserved. */ | |||||
| /* */ | |||||
| /* Redistribution and use in source and binary forms, with or */ | |||||
| /* without modification, are permitted provided that the following */ | |||||
| /* conditions are met: */ | |||||
| /* */ | |||||
| /* 1. Redistributions of source code must retain the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer. */ | |||||
| /* */ | |||||
| /* 2. Redistributions in binary form must reproduce the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer in the documentation and/or other materials */ | |||||
| /* provided with the distribution. */ | |||||
| /* */ | |||||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||||
| /* */ | |||||
| /* The views and conclusions contained in the software and */ | |||||
| /* documentation are those of the authors and should not be */ | |||||
| /* interpreted as representing official policies, either expressed */ | |||||
| /* or implied, of The University of Texas at Austin. */ | |||||
| /*********************************************************************/ | |||||
| #include "common.h" | |||||
| #include <asm/hwcap.h> | |||||
| #include <sys/auxv.h> | |||||
| extern gotoblas_t gotoblas_ARMV8; | |||||
| extern gotoblas_t gotoblas_CORTEXA57; | |||||
| extern gotoblas_t gotoblas_THUNDERX; | |||||
| extern gotoblas_t gotoblas_THUNDERX2T99; | |||||
| extern void openblas_warning(int verbose, const char * msg); | |||||
| #define NUM_CORETYPES 4 | |||||
| /* | |||||
| * In case asm/hwcap.h is outdated on the build system, make sure | |||||
| * that HWCAP_CPUID is defined | |||||
| */ | |||||
| #ifndef HWCAP_CPUID | |||||
| #define HWCAP_CPUID (1 << 11) | |||||
| #endif | |||||
| #define get_cpu_ftr(id, var) ({ \ | |||||
| asm("mrs %0, "#id : "=r" (var)); \ | |||||
| }) | |||||
| static char *corename[] = { | |||||
| "armv8", | |||||
| "cortexa57", | |||||
| "thunderx", | |||||
| "thunderx2t99", | |||||
| "unknown" | |||||
| }; | |||||
| char *gotoblas_corename(void) { | |||||
| if (gotoblas == &gotoblas_ARMV8) return corename[ 0]; | |||||
| if (gotoblas == &gotoblas_CORTEXA57) return corename[ 1]; | |||||
| if (gotoblas == &gotoblas_THUNDERX) return corename[ 2]; | |||||
| if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 3]; | |||||
| return corename[NUM_CORETYPES]; | |||||
| } | |||||
| static gotoblas_t *force_coretype(char *coretype) { | |||||
| int i ; | |||||
| int found = -1; | |||||
| char message[128]; | |||||
| for ( i=0 ; i < NUM_CORETYPES; i++) | |||||
| { | |||||
| if (!strncasecmp(coretype, corename[i], 20)) | |||||
| { | |||||
| found = i; | |||||
| break; | |||||
| } | |||||
| } | |||||
| switch (found) | |||||
| { | |||||
| case 0: return (&gotoblas_ARMV8); | |||||
| case 1: return (&gotoblas_CORTEXA57); | |||||
| case 2: return (&gotoblas_THUNDERX); | |||||
| case 3: return (&gotoblas_THUNDERX2T99); | |||||
| } | |||||
| snprintf(message, 128, "Core not found: %s\n", coretype); | |||||
| openblas_warning(1, message); | |||||
| return NULL; | |||||
| } | |||||
| static gotoblas_t *get_coretype(void) { | |||||
| int implementer, variant, part, arch, revision, midr_el1; | |||||
| if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) { | |||||
| char coremsg[128]; | |||||
| snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n"); | |||||
| openblas_warning(1, coremsg); | |||||
| return NULL; | |||||
| } | |||||
| get_cpu_ftr(MIDR_EL1, midr_el1); | |||||
| /* | |||||
| * MIDR_EL1 | |||||
| * | |||||
| * 31 24 23 20 19 16 15 4 3 0 | |||||
| * ----------------------------------------------------------------- | |||||
| * | Implementer | Variant | Architecture | Part Number | Revision | | |||||
| * ----------------------------------------------------------------- | |||||
| */ | |||||
| implementer = (midr_el1 >> 24) & 0xFF; | |||||
| part = (midr_el1 >> 4) & 0xFFF; | |||||
| switch(implementer) | |||||
| { | |||||
| case 0x41: // ARM | |||||
| switch (part) | |||||
| { | |||||
| case 0xd07: // Cortex A57 | |||||
| case 0xd08: // Cortex A72 | |||||
| case 0xd03: // Cortex A53 | |||||
| return &gotoblas_CORTEXA57; | |||||
| } | |||||
| break; | |||||
| case 0x42: // Broadcom | |||||
| switch (part) | |||||
| { | |||||
| case 0x516: // Vulcan | |||||
| return &gotoblas_THUNDERX2T99; | |||||
| } | |||||
| break; | |||||
| case 0x43: // Cavium | |||||
| switch (part) | |||||
| { | |||||
| case 0x0a1: // ThunderX | |||||
| return &gotoblas_THUNDERX; | |||||
| case 0x0af: // ThunderX2 | |||||
| return &gotoblas_THUNDERX2T99; | |||||
| } | |||||
| break; | |||||
| } | |||||
| return NULL; | |||||
| } | |||||
| void gotoblas_dynamic_init(void) { | |||||
| char coremsg[128]; | |||||
| char coren[22]; | |||||
| char *p; | |||||
| if (gotoblas) return; | |||||
| p = getenv("OPENBLAS_CORETYPE"); | |||||
| if ( p ) | |||||
| { | |||||
| gotoblas = force_coretype(p); | |||||
| } | |||||
| else | |||||
| { | |||||
| gotoblas = get_coretype(); | |||||
| } | |||||
| if (gotoblas == NULL) | |||||
| { | |||||
| snprintf(coremsg, 128, "Falling back to generic ARMV8 core\n"); | |||||
| openblas_warning(1, coremsg); | |||||
| gotoblas = &gotoblas_ARMV8; | |||||
| } | |||||
| if (gotoblas && gotoblas->init) { | |||||
| strncpy(coren, gotoblas_corename(), 20); | |||||
| sprintf(coremsg, "Core: %s\n", coren); | |||||
| openblas_warning(2, coremsg); | |||||
| gotoblas -> init(); | |||||
| } else { | |||||
| openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); | |||||
| exit(1); | |||||
| } | |||||
| } | |||||
| void gotoblas_dynamic_quit(void) { | |||||
| gotoblas = NULL; | |||||
| } | |||||
| @@ -730,35 +730,8 @@ void blas_set_parameter(void){ | |||||
| #if defined(ARCH_ARM64) | #if defined(ARCH_ARM64) | ||||
| #if defined(VULCAN) || defined(THUNDERX2T99) || defined(ARMV8) | |||||
| unsigned long dgemm_prefetch_size_a; | |||||
| unsigned long dgemm_prefetch_size_b; | |||||
| unsigned long dgemm_prefetch_size_c; | |||||
| #endif | |||||
| void blas_set_parameter(void) | void blas_set_parameter(void) | ||||
| { | { | ||||
| #if defined(VULCAN) || defined(THUNDERX2T99) || defined(ARMV8) | |||||
| dgemm_p = 160; | |||||
| dgemm_q = 128; | |||||
| dgemm_r = 4096; | |||||
| sgemm_p = 128; | |||||
| sgemm_q = 352; | |||||
| sgemm_r = 4096; | |||||
| cgemm_p = 128; | |||||
| cgemm_q = 224; | |||||
| cgemm_r = 4096; | |||||
| zgemm_p = 128; | |||||
| zgemm_q = 112; | |||||
| zgemm_r = 4096; | |||||
| dgemm_prefetch_size_a = 3584; | |||||
| dgemm_prefetch_size_b = 512; | |||||
| dgemm_prefetch_size_c = 128; | |||||
| #endif | |||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -88,7 +88,11 @@ lsame.$(SUFFIX): $(KERNELDIR)/$(LSAME_KERNEL) | |||||
| $(CC) -c $(CFLAGS) -DF_INTERFACE $< -o $(@F) | $(CC) -c $(CFLAGS) -DF_INTERFACE $< -o $(@F) | ||||
| setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h | setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h | ||||
| ifeq ($(USE_GEMM3M), 1) | |||||
| $(CC) -c $(CFLAGS) -DUSE_GEMM3M $< -o $@ | |||||
| else | |||||
| $(CC) -c $(CFLAGS) $< -o $@ | $(CC) -c $(CFLAGS) $< -o $@ | ||||
| endif | |||||
| setparam$(TSUFFIX).c : setparam-ref.c | setparam$(TSUFFIX).c : setparam-ref.c | ||||
| sed 's/TS/$(TSUFFIX)/g' $< > $(@F) | sed 's/TS/$(TSUFFIX)/g' $< > $(@F) | ||||
| @@ -113,13 +113,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||||
| ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | ||||
| SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | ||||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | ||||
| SGEMMINCOPYOBJ = sgemm_incopy.o | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy.o | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | ||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | ||||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | ||||
| DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | ||||
| @@ -134,8 +134,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||||
| DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | ||||
| endif | endif | ||||
| DGEMMINCOPYOBJ = dgemm_incopy.o | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy.o | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| ifeq ($(DGEMM_UNROLL_N), 4) | ifeq ($(DGEMM_UNROLL_N), 4) | ||||
| @@ -146,34 +146,34 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | ||||
| endif | endif | ||||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | ||||
| CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | ||||
| ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | ||||
| CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | ||||
| CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | ||||
| CGEMMINCOPYOBJ = cgemm_incopy.o | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy.o | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | ||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | ||||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | ||||
| ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | ||||
| ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | ||||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | ||||
| ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | ||||
| ZGEMMINCOPYOBJ = zgemm_incopy.o | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy.o | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | ||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | ||||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4) | ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4) | ||||
| DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S | DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S | ||||
| @@ -201,25 +201,25 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
| SGEMMKERNEL = ../generic/gemmkernel_2x2.c | SGEMMKERNEL = ../generic/gemmkernel_2x2.c | ||||
| SGEMMONCOPY = ../generic/gemm_ncopy_2.c | SGEMMONCOPY = ../generic/gemm_ncopy_2.c | ||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | ||||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMKERNEL = ../generic/gemmkernel_2x2.c | DGEMMKERNEL = ../generic/gemmkernel_2x2.c | ||||
| DGEMMONCOPY = ../generic/gemm_ncopy_2.c | DGEMMONCOPY = ../generic/gemm_ncopy_2.c | ||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | ||||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | ||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | ||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | ||||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | ||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | ||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | ||||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| @@ -111,13 +111,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||||
| ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | ||||
| SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | ||||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | ||||
| SGEMMINCOPYOBJ = sgemm_incopy.o | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy.o | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | ||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | ||||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | ||||
| DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | ||||
| @@ -132,8 +132,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||||
| DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | ||||
| endif | endif | ||||
| DGEMMINCOPYOBJ = dgemm_incopy.o | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy.o | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| ifeq ($(DGEMM_UNROLL_N), 4) | ifeq ($(DGEMM_UNROLL_N), 4) | ||||
| @@ -144,32 +144,32 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | ||||
| endif | endif | ||||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | ||||
| CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | ||||
| ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | ||||
| CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | ||||
| CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | ||||
| CGEMMINCOPYOBJ = cgemm_incopy.o | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy.o | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | ||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | ||||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | ||||
| ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | ||||
| ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | ||||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | ||||
| ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | ||||
| ZGEMMINCOPYOBJ = zgemm_incopy.o | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy.o | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | ||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | ||||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| @@ -89,26 +89,26 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
| SGEMMKERNEL = sgemm_kernel_4x4.S | SGEMMKERNEL = sgemm_kernel_4x4.S | ||||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | SGEMMONCOPY = ../generic/gemm_ncopy_4.c | ||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | ||||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMKERNEL = ../generic/gemmkernel_2x2.c | DGEMMKERNEL = ../generic/gemmkernel_2x2.c | ||||
| DGEMMONCOPY = ../generic/gemm_ncopy_2.c | DGEMMONCOPY = ../generic/gemm_ncopy_2.c | ||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | ||||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | ||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | ||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | ||||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | ||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | ||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | ||||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | ||||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | ||||
| @@ -74,13 +74,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||||
| ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | ||||
| SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | ||||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | ||||
| SGEMMINCOPYOBJ = sgemm_incopy.o | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy.o | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | ||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | ||||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | ||||
| @@ -94,8 +94,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||||
| DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | ||||
| endif | endif | ||||
| DGEMMINCOPYOBJ = dgemm_incopy.o | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy.o | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| ifeq ($(DGEMM_UNROLL_N), 4) | ifeq ($(DGEMM_UNROLL_N), 4) | ||||
| @@ -106,32 +106,32 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | ||||
| endif | endif | ||||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | ||||
| ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | ||||
| CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | ||||
| CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | ||||
| CGEMMINCOPYOBJ = cgemm_incopy.o | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy.o | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | ||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | ||||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | ||||
| ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | ||||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | ||||
| ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | ||||
| ZGEMMINCOPYOBJ = zgemm_incopy.o | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy.o | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | ||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | ||||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| SASUMKERNEL = sasum_thunderx2t99.c | SASUMKERNEL = sasum_thunderx2t99.c | ||||
| DASUMKERNEL = dasum_thunderx2t99.c | DASUMKERNEL = dasum_thunderx2t99.c | ||||
| @@ -1,135 +0,0 @@ | |||||
| SAMAXKERNEL = amax.S | |||||
| DAMAXKERNEL = amax.S | |||||
| CAMAXKERNEL = zamax.S | |||||
| ZAMAXKERNEL = zamax.S | |||||
| SAMINKERNEL = ../arm/amin.c | |||||
| DAMINKERNEL = ../arm/amin.c | |||||
| CAMINKERNEL = ../arm/zamin.c | |||||
| ZAMINKERNEL = ../arm/zamin.c | |||||
| SMAXKERNEL = ../arm/max.c | |||||
| DMAXKERNEL = ../arm/max.c | |||||
| SMINKERNEL = ../arm/min.c | |||||
| DMINKERNEL = ../arm/min.c | |||||
| ISAMAXKERNEL = iamax.S | |||||
| IDAMAXKERNEL = iamax.S | |||||
| ICAMAXKERNEL = izamax.S | |||||
| IZAMAXKERNEL = izamax.S | |||||
| ISAMINKERNEL = ../arm/iamin.c | |||||
| IDAMINKERNEL = ../arm/iamin.c | |||||
| ICAMINKERNEL = ../arm/izamin.c | |||||
| IZAMINKERNEL = ../arm/izamin.c | |||||
| ISMAXKERNEL = ../arm/imax.c | |||||
| IDMAXKERNEL = ../arm/imax.c | |||||
| ISMINKERNEL = ../arm/imin.c | |||||
| IDMINKERNEL = ../arm/imin.c | |||||
| SASUMKERNEL = asum.S | |||||
| DASUMKERNEL = asum.S | |||||
| CASUMKERNEL = casum.S | |||||
| ZASUMKERNEL = zasum.S | |||||
| SAXPYKERNEL = axpy.S | |||||
| DAXPYKERNEL = axpy.S | |||||
| CAXPYKERNEL = zaxpy.S | |||||
| ZAXPYKERNEL = zaxpy.S | |||||
| SCOPYKERNEL = copy.S | |||||
| DCOPYKERNEL = copy.S | |||||
| CCOPYKERNEL = copy.S | |||||
| ZCOPYKERNEL = copy.S | |||||
| SDOTKERNEL = dot.S | |||||
| DDOTKERNEL = dot.S | |||||
| CDOTKERNEL = zdot.S | |||||
| ZDOTKERNEL = zdot.S | |||||
| DSDOTKERNEL = dot.S | |||||
| SNRM2KERNEL = nrm2.S | |||||
| DNRM2KERNEL = nrm2.S | |||||
| CNRM2KERNEL = znrm2.S | |||||
| ZNRM2KERNEL = znrm2.S | |||||
| SROTKERNEL = rot.S | |||||
| DROTKERNEL = rot.S | |||||
| CROTKERNEL = zrot.S | |||||
| ZROTKERNEL = zrot.S | |||||
| SSCALKERNEL = scal.S | |||||
| DSCALKERNEL = scal.S | |||||
| CSCALKERNEL = zscal.S | |||||
| ZSCALKERNEL = zscal.S | |||||
| SSWAPKERNEL = swap.S | |||||
| DSWAPKERNEL = swap.S | |||||
| CSWAPKERNEL = swap.S | |||||
| ZSWAPKERNEL = swap.S | |||||
| SGEMVNKERNEL = gemv_n.S | |||||
| DGEMVNKERNEL = gemv_n.S | |||||
| CGEMVNKERNEL = zgemv_n.S | |||||
| ZGEMVNKERNEL = zgemv_n.S | |||||
| SGEMVTKERNEL = gemv_t.S | |||||
| DGEMVTKERNEL = gemv_t.S | |||||
| CGEMVTKERNEL = zgemv_t.S | |||||
| ZGEMVTKERNEL = zgemv_t.S | |||||
| STRMMKERNEL = ../generic/trmmkernel_4x4.c | |||||
| DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||||
| CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
| ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
| SGEMMKERNEL = sgemm_kernel_4x4.S | |||||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
| DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||||
| DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
| ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| @@ -943,13 +943,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| prfm PLDL1KEEP, [origPB] | prfm PLDL1KEEP, [origPB] | ||||
| prfm PLDL1KEEP, [origPA] | prfm PLDL1KEEP, [origPA] | ||||
| ldr A_PRE_SIZE, =dgemm_prefetch_size_a | |||||
| ldr A_PRE_SIZE, [A_PRE_SIZE] | |||||
| ldr B_PRE_SIZE, =dgemm_prefetch_size_b | |||||
| ldr B_PRE_SIZE, [B_PRE_SIZE] | |||||
| ldr C_PRE_SIZE, =dgemm_prefetch_size_c | |||||
| ldr C_PRE_SIZE, [C_PRE_SIZE] | |||||
| mov A_PRE_SIZE, #3584 | |||||
| mov B_PRE_SIZE, #512 | |||||
| mov C_PRE_SIZE, #128 | |||||
| add A_PRE_SIZE_64, A_PRE_SIZE, #64 | add A_PRE_SIZE_64, A_PRE_SIZE, #64 | ||||
| add B_PRE_SIZE_64, B_PRE_SIZE, #64 | add B_PRE_SIZE_64, B_PRE_SIZE, #64 | ||||
| @@ -294,6 +294,8 @@ gotoblas_t TABLE_NAME = { | |||||
| chemm_outcopyTS, chemm_oltcopyTS, | chemm_outcopyTS, chemm_oltcopyTS, | ||||
| 0, 0, 0, | 0, 0, 0, | ||||
| #if defined(USE_GEMM3M) | |||||
| #ifdef CGEMM3M_DEFAULT_UNROLL_M | #ifdef CGEMM3M_DEFAULT_UNROLL_M | ||||
| CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N), | CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N), | ||||
| #else | #else | ||||
| @@ -324,6 +326,33 @@ gotoblas_t TABLE_NAME = { | |||||
| chemm3m_oucopybTS, chemm3m_olcopybTS, | chemm3m_oucopybTS, chemm3m_olcopybTS, | ||||
| chemm3m_oucopyrTS, chemm3m_olcopyrTS, | chemm3m_oucopyrTS, chemm3m_olcopyrTS, | ||||
| chemm3m_oucopyiTS, chemm3m_olcopyiTS, | chemm3m_oucopyiTS, chemm3m_olcopyiTS, | ||||
| #else | |||||
| 0, 0, 0, | |||||
| NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| #endif | |||||
| #ifndef NO_LAPACK | #ifndef NO_LAPACK | ||||
| cneg_tcopyTS, claswp_ncopyTS, | cneg_tcopyTS, claswp_ncopyTS, | ||||
| @@ -400,6 +429,7 @@ gotoblas_t TABLE_NAME = { | |||||
| zhemm_outcopyTS, zhemm_oltcopyTS, | zhemm_outcopyTS, zhemm_oltcopyTS, | ||||
| 0, 0, 0, | 0, 0, 0, | ||||
| #if defined(USE_GEMM3M) | |||||
| #ifdef ZGEMM3M_DEFAULT_UNROLL_M | #ifdef ZGEMM3M_DEFAULT_UNROLL_M | ||||
| ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N), | ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N), | ||||
| #else | #else | ||||
| @@ -430,6 +460,33 @@ gotoblas_t TABLE_NAME = { | |||||
| zhemm3m_oucopybTS, zhemm3m_olcopybTS, | zhemm3m_oucopybTS, zhemm3m_olcopybTS, | ||||
| zhemm3m_oucopyrTS, zhemm3m_olcopyrTS, | zhemm3m_oucopyrTS, zhemm3m_olcopyrTS, | ||||
| zhemm3m_oucopyiTS, zhemm3m_olcopyiTS, | zhemm3m_oucopyiTS, zhemm3m_olcopyiTS, | ||||
| #else | |||||
| 0, 0, 0, | |||||
| NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| #endif | |||||
| #ifndef NO_LAPACK | #ifndef NO_LAPACK | ||||
| zneg_tcopyTS, zlaswp_ncopyTS, | zneg_tcopyTS, zlaswp_ncopyTS, | ||||
| @@ -503,6 +560,7 @@ gotoblas_t TABLE_NAME = { | |||||
| xhemm_outcopyTS, xhemm_oltcopyTS, | xhemm_outcopyTS, xhemm_oltcopyTS, | ||||
| 0, 0, 0, | 0, 0, 0, | ||||
| #if defined(USE_GEMM3M) | |||||
| QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N), | QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N), | ||||
| xgemm3m_kernelTS, | xgemm3m_kernelTS, | ||||
| @@ -528,6 +586,33 @@ gotoblas_t TABLE_NAME = { | |||||
| xhemm3m_oucopybTS, xhemm3m_olcopybTS, | xhemm3m_oucopybTS, xhemm3m_olcopybTS, | ||||
| xhemm3m_oucopyrTS, xhemm3m_olcopyrTS, | xhemm3m_oucopyrTS, xhemm3m_olcopyrTS, | ||||
| xhemm3m_oucopyiTS, xhemm3m_olcopyiTS, | xhemm3m_oucopyiTS, xhemm3m_olcopyiTS, | ||||
| #else | |||||
| 0, 0, 0, | |||||
| NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| NULL, NULL, | |||||
| #endif | |||||
| #ifndef NO_LAPACK | #ifndef NO_LAPACK | ||||
| xneg_tcopyTS, xlaswp_ncopyTS, | xneg_tcopyTS, xlaswp_ncopyTS, | ||||
| @@ -561,6 +646,78 @@ gotoblas_t TABLE_NAME = { | |||||
| }; | }; | ||||
| #if defined(ARCH_ARM64) | |||||
| static void init_parameter(void) { | |||||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||||
| TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||||
| TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | |||||
| TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | |||||
| TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; | |||||
| TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | |||||
| TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | |||||
| TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | |||||
| TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | |||||
| #ifdef EXPRECISION | |||||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||||
| TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||||
| TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q; | |||||
| TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q; | |||||
| TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R; | |||||
| TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R; | |||||
| #endif | |||||
| #if defined(USE_GEMM3M) | |||||
| #ifdef CGEMM3M_DEFAULT_P | |||||
| TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P; | |||||
| #else | |||||
| TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p; | |||||
| #endif | |||||
| #ifdef ZGEMM3M_DEFAULT_P | |||||
| TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P; | |||||
| #else | |||||
| TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p; | |||||
| #endif | |||||
| #ifdef CGEMM3M_DEFAULT_Q | |||||
| TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q; | |||||
| #else | |||||
| TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q; | |||||
| #endif | |||||
| #ifdef ZGEMM3M_DEFAULT_Q | |||||
| TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q; | |||||
| #else | |||||
| TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q; | |||||
| #endif | |||||
| #ifdef CGEMM3M_DEFAULT_R | |||||
| TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R; | |||||
| #else | |||||
| TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r; | |||||
| #endif | |||||
| #ifdef ZGEMM3M_DEFAULT_R | |||||
| TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R; | |||||
| #else | |||||
| TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r; | |||||
| #endif | |||||
| #ifdef EXPRECISION | |||||
| TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p; | |||||
| TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q; | |||||
| TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r; | |||||
| #endif | |||||
| #endif | |||||
| } | |||||
| #else // defined(ARCH_ARM64) | |||||
| #ifdef ARCH_X86 | #ifdef ARCH_X86 | ||||
| static int get_l2_size_old(void){ | static int get_l2_size_old(void){ | ||||
| int i, eax, ebx, ecx, edx, cpuid_level; | int i, eax, ebx, ecx, edx, cpuid_level; | ||||
| @@ -1146,3 +1303,4 @@ static void init_parameter(void) { | |||||
| } | } | ||||
| #endif //defined(ARCH_ARM64) | |||||
| @@ -2641,20 +2641,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ZGEMM_DEFAULT_UNROLL_M 4 | #define ZGEMM_DEFAULT_UNROLL_M 4 | ||||
| #define ZGEMM_DEFAULT_UNROLL_N 4 | #define ZGEMM_DEFAULT_UNROLL_N 4 | ||||
| #define SGEMM_DEFAULT_P sgemm_p | |||||
| #define DGEMM_DEFAULT_P dgemm_p | |||||
| #define CGEMM_DEFAULT_P cgemm_p | |||||
| #define ZGEMM_DEFAULT_P zgemm_p | |||||
| #define SGEMM_DEFAULT_P 128 | |||||
| #define DGEMM_DEFAULT_P 160 | |||||
| #define CGEMM_DEFAULT_P 128 | |||||
| #define ZGEMM_DEFAULT_P 128 | |||||
| #define SGEMM_DEFAULT_Q sgemm_q | |||||
| #define DGEMM_DEFAULT_Q dgemm_q | |||||
| #define CGEMM_DEFAULT_Q cgemm_q | |||||
| #define ZGEMM_DEFAULT_Q zgemm_q | |||||
| #define SGEMM_DEFAULT_Q 352 | |||||
| #define DGEMM_DEFAULT_Q 128 | |||||
| #define CGEMM_DEFAULT_Q 224 | |||||
| #define ZGEMM_DEFAULT_Q 112 | |||||
| #define SGEMM_DEFAULT_R sgemm_r | |||||
| #define DGEMM_DEFAULT_R dgemm_r | |||||
| #define CGEMM_DEFAULT_R cgemm_r | |||||
| #define ZGEMM_DEFAULT_R zgemm_r | |||||
| #define SGEMM_DEFAULT_R 4096 | |||||
| #define DGEMM_DEFAULT_R 4096 | |||||
| #define CGEMM_DEFAULT_R 4096 | |||||
| #define ZGEMM_DEFAULT_R 4096 | |||||
| #define SYMV_P 16 | #define SYMV_P 16 | ||||
| #endif | #endif | ||||
| @@ -2720,20 +2720,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ZGEMM_DEFAULT_UNROLL_M 4 | #define ZGEMM_DEFAULT_UNROLL_M 4 | ||||
| #define ZGEMM_DEFAULT_UNROLL_N 4 | #define ZGEMM_DEFAULT_UNROLL_N 4 | ||||
| #define SGEMM_DEFAULT_P sgemm_p | |||||
| #define DGEMM_DEFAULT_P dgemm_p | |||||
| #define CGEMM_DEFAULT_P cgemm_p | |||||
| #define ZGEMM_DEFAULT_P zgemm_p | |||||
| #define SGEMM_DEFAULT_P 128 | |||||
| #define DGEMM_DEFAULT_P 160 | |||||
| #define CGEMM_DEFAULT_P 128 | |||||
| #define ZGEMM_DEFAULT_P 128 | |||||
| #define SGEMM_DEFAULT_Q sgemm_q | |||||
| #define DGEMM_DEFAULT_Q dgemm_q | |||||
| #define CGEMM_DEFAULT_Q cgemm_q | |||||
| #define ZGEMM_DEFAULT_Q zgemm_q | |||||
| #define SGEMM_DEFAULT_Q 352 | |||||
| #define DGEMM_DEFAULT_Q 128 | |||||
| #define CGEMM_DEFAULT_Q 224 | |||||
| #define ZGEMM_DEFAULT_Q 112 | |||||
| #define SGEMM_DEFAULT_R sgemm_r | |||||
| #define DGEMM_DEFAULT_R dgemm_r | |||||
| #define CGEMM_DEFAULT_R cgemm_r | |||||
| #define ZGEMM_DEFAULT_R zgemm_r | |||||
| #define SGEMM_DEFAULT_R 4096 | |||||
| #define DGEMM_DEFAULT_R 4096 | |||||
| #define CGEMM_DEFAULT_R 4096 | |||||
| #define ZGEMM_DEFAULT_R 4096 | |||||
| #define SYMV_P 16 | #define SYMV_P 16 | ||||
| #endif | #endif | ||||