| @@ -41,8 +41,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
| foreach (float_type ${FLOAT_TYPES}) | |||
| # a bit of metaprogramming here to pull out the appropriate KERNEL var | |||
| string(SUBSTRING ${float_type} 0 1 float_char) | |||
| if (${float_type} STREQUAL "HALF") | |||
| set (float_char "SH") | |||
| if (${float_type} STREQUAL "BFLOAT16") | |||
| set (float_char "SB") | |||
| endif () | |||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type}) | |||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false ${float_type}) | |||
| @@ -149,8 +149,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
| GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) | |||
| foreach (float_type ${FLOAT_TYPES}) | |||
| string(SUBSTRING ${float_type} 0 1 float_char) | |||
| if (${float_type} STREQUAL "HALF") | |||
| set (float_char "SH") | |||
| if (${float_type} STREQUAL "BFLOAT16") | |||
| set (float_char "SB") | |||
| endif () | |||
| if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "" "geru_k" false "" "" false ${float_type}) | |||
| @@ -208,13 +208,13 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTPERFORMANT}" "" "gemm_direct_performant" false "" "" false SINGLE) | |||
| endif() | |||
| foreach (float_type SINGLE DOUBLE HALF) | |||
| foreach (float_type SINGLE DOUBLE BFLOAT16) | |||
| string(SUBSTRING ${float_type} 0 1 float_char) | |||
| if (${float_type} STREQUAL "HALF") | |||
| if (NOT ${BUILD_HALF}) | |||
| if (${float_type} STREQUAL "BFLOAT16") | |||
| if (NOT ${BUILD_BFLOAT16}) | |||
| continue () | |||
| else () | |||
| set (float_char "SH") | |||
| set (float_char "SB") | |||
| endif () | |||
| endif () | |||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) | |||
| @@ -254,8 +254,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
| foreach (float_type ${FLOAT_TYPES}) | |||
| string(SUBSTRING ${float_type} 0 1 float_char) | |||
| if (${float_type} STREQUAL "HALF") | |||
| set (float_char "SH") | |||
| if (${float_type} STREQUAL "BFLOAT16") | |||
| set (float_char "SB") | |||
| endif () | |||
| if (${float_char}GEMMINCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMINCOPY}" "${float_type}" "${${float_char}GEMMINCOPYOBJ}" false "" "" true ${float_type}) | |||
| @@ -620,8 +620,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
| # Makefile.LA | |||
| if(NOT NO_LAPACK) | |||
| foreach (float_type ${FLOAT_TYPES}) | |||
| if (${float_type} STREQUAL "HALF") | |||
| set (float_char "SH") | |||
| if (${float_type} STREQUAL "BFLOAT16") | |||
| set (float_char "SB") | |||
| endif () | |||
| if (NOT DEFINED ${float_char}NEG_TCOPY) | |||
| if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C" OR ${float_char} STREQUAL "X") | |||
| @@ -688,8 +688,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
| foreach (float_type ${FLOAT_TYPES}) | |||
| # a bit of metaprogramming here to pull out the appropriate KERNEL var | |||
| string(SUBSTRING ${float_type} 0 1 float_char) | |||
| if (${float_type} STREQUAL "HALF") | |||
| set (float_char "SH") | |||
| if (${float_type} STREQUAL "BFLOAT16") | |||
| set (float_char "SB") | |||
| endif () | |||
| GenerateNamedObjects("generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false ${float_type}) | |||
| GenerateNamedObjects("generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false ${float_type}) | |||
| @@ -262,9 +262,9 @@ ifndef XDOTKERNEL | |||
| XDOTKERNEL = zdot.S | |||
| endif | |||
| ifeq ($(BUILD_HALF),1) | |||
| ifndef SHDOTKERNEL | |||
| SHDOTKERNEL = ../x86_64/shdot.c | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| ifndef SBDOTKERNEL | |||
| SBDOTKERNEL = ../x86_64/sbdot.c | |||
| endif | |||
| ifndef TOBF16KERNEL | |||
| @@ -530,11 +530,11 @@ XBLASOBJS += \ | |||
| xdotc_k$(TSUFFIX).$(SUFFIX) xdotu_k$(TSUFFIX).$(SUFFIX) xnrm2_k$(TSUFFIX).$(SUFFIX) xqrot_k$(TSUFFIX).$(SUFFIX) \ | |||
| xscal_k$(TSUFFIX).$(SUFFIX) xswap_k$(TSUFFIX).$(SUFFIX) xsum_k$(TSUFFIX).$(SUFFIX) | |||
| ifeq ($(BUILD_HALF),1) | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| SHBLASOBJS += \ | |||
| shdot_k$(TSUFFIX).$(SUFFIX) | |||
| sbdot_k$(TSUFFIX).$(SUFFIX) | |||
| SHEXTOBJS += \ | |||
| shstobf16_k$(TSUFFIX).$(SUFFIX) shdtobf16_k$(TSUFFIX).$(SUFFIX) | |||
| sbstobf16_k$(TSUFFIX).$(SUFFIX) sbdtobf16_k$(TSUFFIX).$(SUFFIX) | |||
| SHEXTOBJS += \ | |||
| sbf16tos_k$(TSUFFIX).$(SUFFIX) dbf16tod_k$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| @@ -757,12 +757,12 @@ $(KDIR)ddot_k$(TSUFFIX).$(SUFFIX) $(KDIR)ddot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL | |||
| $(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QDOTKERNEL) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@ | |||
| ifeq ($(BUILD_HALF),1) | |||
| $(KDIR)shdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)shdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHDOTKERNEL) | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| $(KDIR)sbdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sbdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBDOTKERNEL) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX $< -o $@ | |||
| $(KDIR)shstobf16_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TOBF16KERNEL) | |||
| $(KDIR)sbstobf16_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TOBF16KERNEL) | |||
| $(CC) -c $(CFLAGS) -UDOUBLE -DSINGLE $< -o $@ | |||
| $(KDIR)shdtobf16_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TOBF16KERNEL) | |||
| $(KDIR)sbdtobf16_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TOBF16KERNEL) | |||
| $(CC) -c $(CFLAGS) -DDOUBLE -USINGLE $< -o $@ | |||
| $(KDIR)sbf16tos_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(BF16TOKERNEL) | |||
| $(CC) -c $(CFLAGS) -UDOUBLE -DSINGLE $< -o $@ | |||
| @@ -80,24 +80,24 @@ SGEMMDIRECTPERFORMANT = sgemm_direct_performant.c | |||
| endif | |||
| endif | |||
| ifeq ($(BUILD_HALF), 1) | |||
| ifndef SHGEMMKERNEL | |||
| SHGEMM_BETA = ../generic/gemm_beta.c | |||
| SHGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| SHGEMMINCOPY = ../generic/gemm_ncopy_2.c | |||
| SHGEMMITCOPY = ../generic/gemm_tcopy_2.c | |||
| SHGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| SHGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| SHGEMMINCOPYOBJ = shgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SHGEMMITCOPYOBJ = shgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ifeq ($(BUILD_BFLOAT16), 1) | |||
| ifndef SBGEMMKERNEL | |||
| SBGEMM_BETA = ../generic/gemm_beta.c | |||
| SBGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| SBGEMMINCOPY = ../generic/gemm_ncopy_2.c | |||
| SBGEMMITCOPY = ../generic/gemm_tcopy_2.c | |||
| SBGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| SBGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| SBGEMMINCOPYOBJ = sbgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SBGEMMITCOPYOBJ = sbgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| SHKERNELOBJS += \ | |||
| shgemm_kernel$(TSUFFIX).$(SUFFIX) \ | |||
| $(SHGEMMINCOPYOBJ) $(SHGEMMITCOPYOBJ) \ | |||
| $(SHGEMMONCOPYOBJ) $(SHGEMMOTCOPYOBJ) | |||
| sbgemm_kernel$(TSUFFIX).$(SUFFIX) \ | |||
| $(SBGEMMINCOPYOBJ) $(SBGEMMITCOPYOBJ) \ | |||
| $(SBGEMMONCOPYOBJ) $(SBGEMMOTCOPYOBJ) | |||
| endif | |||
| ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" | |||
| @@ -149,7 +149,7 @@ XKERNELOBJS += \ | |||
| $(XGEMMINCOPYOBJ) $(XGEMMITCOPYOBJ) \ | |||
| $(XGEMMONCOPYOBJ) $(XGEMMOTCOPYOBJ) | |||
| ifeq ($(BUILD_HALF),1) | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| SHBLASOBJS += $(SHKERNELOBJS) | |||
| endif | |||
| SBLASOBJS += $(SKERNELOBJS) | |||
| @@ -159,8 +159,8 @@ CBLASOBJS += $(CKERNELOBJS) | |||
| ZBLASOBJS += $(ZKERNELOBJS) | |||
| XBLASOBJS += $(XKERNELOBJS) | |||
| ifeq ($(BUILD_HALF),1) | |||
| SHBLASOBJS += shgemm_beta$(TSUFFIX).$(SUFFIX) | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| SHBLASOBJS += sbgemm_beta$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
| @@ -492,11 +492,11 @@ ZBLASOBJS += \ | |||
| zgeadd_k$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_HALF), 1) | |||
| SHGEMMINCOPYOBJ_P = $(SHGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||
| SHGEMMITCOPYOBJ_P = $(SHGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||
| SHGEMMONCOPYOBJ_P = $(SHGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||
| SHGEMMOTCOPYOBJ_P = $(SHGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||
| ifeq ($(BUILD_BFLOAT16), 1) | |||
| SBGEMMINCOPYOBJ_P = $(SBGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||
| SBGEMMITCOPYOBJ_P = $(SBGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||
| SBGEMMONCOPYOBJ_P = $(SBGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||
| SBGEMMOTCOPYOBJ_P = $(SBGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||
| endif | |||
| SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||
| @@ -524,9 +524,9 @@ XGEMMITCOPYOBJ_P = $(XGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||
| XGEMMONCOPYOBJ_P = $(XGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||
| XGEMMOTCOPYOBJ_P = $(XGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||
| ifeq ($(BUILD_HALF),1) | |||
| $(KDIR)shgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA) | |||
| $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| $(KDIR)sbgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_BETA) | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| endif | |||
| $(KDIR)sgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_BETA) | |||
| @@ -548,35 +548,35 @@ $(KDIR)xgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMM_BETA) | |||
| $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@ | |||
| ifeq ($(BUILD_HALF), 1) | |||
| ifeq ($(BUILD_BFLOAT16), 1) | |||
| $(KDIR)$(SHGEMMONCOPYOBJ) : $(KERNELDIR)/$(SHGEMMONCOPY) | |||
| $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | |||
| $(KDIR)$(SBGEMMONCOPYOBJ) : $(KERNELDIR)/$(SBGEMMONCOPY) | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| $(KDIR)$(SHGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SHGEMMOTCOPY) | |||
| $(KDIR)$(SBGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SBGEMMOTCOPY) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemmotcopy.s | |||
| m4 shgemmotcopy.s > shgemmotcopy_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemmotcopy_nomacros.s -o $@ | |||
| rm shgemmotcopy.s shgemmotcopy_nomacros.s | |||
| $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmotcopy.s | |||
| m4 sbgemmotcopy.s > sbgemmotcopy_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmotcopy_nomacros.s -o $@ | |||
| rm sbgemmotcopy.s sbgemmotcopy_nomacros.s | |||
| else | |||
| $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| endif | |||
| ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N)) | |||
| ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N)) | |||
| $(KDIR)$(SHGEMMINCOPYOBJ) : $(KERNELDIR)/$(SHGEMMINCOPY) | |||
| $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | |||
| $(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY) | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| $(KDIR)$(SHGEMMITCOPYOBJ) : $(KERNELDIR)/$(SHGEMMITCOPY) | |||
| $(KDIR)$(SBGEMMITCOPYOBJ) : $(KERNELDIR)/$(SBGEMMITCOPY) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemmitcopy.s | |||
| m4 shgemmitcopy.s > shgemmitcopy_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemmitcopy_nomacros.s -o $@ | |||
| rm shgemmitcopy.s shgemmitcopy_nomacros.s | |||
| $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmitcopy.s | |||
| m4 sbgemmitcopy.s > sbgemmitcopy_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmitcopy_nomacros.s -o $@ | |||
| rm sbgemmitcopy.s sbgemmitcopy_nomacros.s | |||
| else | |||
| $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| endif | |||
| endif | |||
| @@ -746,16 +746,16 @@ $(KDIR)sgemm_direct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTKERNEL) | |||
| $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | |||
| endif | |||
| ifeq ($(BUILD_HALF), 1) | |||
| ifeq ($(BUILD_BFLOAT16), 1) | |||
| $(KDIR)shgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND) | |||
| $(KDIR)sbgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMMDEPEND) | |||
| ifeq ($(OS), AIX) | |||
| $(CC) $(CFLAGS) -S -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemm_kernel$(TSUFFIX).s | |||
| m4 shgemm_kernel$(TSUFFIX).s > shgemm_kernel$(TSUFFIX)_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemm_kernel$(TSUFFIX)_nomacros.s -o $@ | |||
| rm shgemm_kernel$(TSUFFIX).s shgemm_kernel$(TSUFFIX)_nomacros.s | |||
| $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemm_kernel$(TSUFFIX).s | |||
| m4 sbgemm_kernel$(TSUFFIX).s > sbgemm_kernel$(TSUFFIX)_nomacros.s | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemm_kernel$(TSUFFIX)_nomacros.s -o $@ | |||
| rm sbgemm_kernel$(TSUFFIX).s sbgemm_kernel$(TSUFFIX)_nomacros.s | |||
| else | |||
| $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | |||
| $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| endif | |||
| endif | |||
| @@ -2375,9 +2375,9 @@ $(KDIR)xtrsm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_ | |||
| $(KDIR)sgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMM_BETA) | |||
| $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | |||
| ifeq ($(BUILD_HALF),1) | |||
| $(KDIR)shgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA) | |||
| $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| $(KDIR)sbgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMM_BETA) | |||
| $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| endif | |||
| $(KDIR)dgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMM_BETA) | |||
| @@ -2396,19 +2396,19 @@ $(KDIR)xgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMM_BETA) | |||
| $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@ | |||
| ifeq ($(BUILD_HALF), 1) | |||
| $(SHGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMONCOPY) | |||
| $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | |||
| ifeq ($(BUILD_BFLOAT16), 1) | |||
| $(SBGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMONCOPY) | |||
| $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| $(SHGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMOTCOPY) | |||
| $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | |||
| $(SBGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMOTCOPY) | |||
| $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N)) | |||
| $(SHGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMINCOPY) | |||
| $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | |||
| ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N)) | |||
| $(SBGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMINCOPY) | |||
| $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| $(SHGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMITCOPY) | |||
| $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | |||
| $(SBGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMITCOPY) | |||
| $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| endif | |||
| endif | |||
| @@ -2518,9 +2518,9 @@ endif | |||
| endif | |||
| ifeq ($(BUILD_HALF), 1) | |||
| $(KDIR)shgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND) | |||
| $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | |||
| ifeq ($(BUILD_BFLOAT16), 1) | |||
| $(KDIR)sbgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMMDEPEND) | |||
| $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||
| endif | |||
| $(KDIR)sgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND) | |||
| @@ -53,32 +53,32 @@ gotoblas_t TABLE_NAME = { | |||
| GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN, | |||
| #ifdef BUILD_HALF | |||
| #ifdef BUILD_BFLOAT16 | |||
| 0, 0, 0, | |||
| SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N, | |||
| #ifdef SHGEMM_DEFAULT_UNROLL_MN | |||
| SHGEMM_DEFAULT_UNROLL_MN, | |||
| SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N, | |||
| #ifdef SBGEMM_DEFAULT_UNROLL_MN | |||
| SBGEMM_DEFAULT_UNROLL_MN, | |||
| #else | |||
| MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N), | |||
| MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N), | |||
| #endif | |||
| shstobf16_kTS, shdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS, | |||
| sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS, | |||
| samax_kTS, samin_kTS, smax_kTS, smin_kTS, | |||
| isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS, | |||
| snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, shdot_kTS, | |||
| snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS, | |||
| dsdot_kTS, | |||
| srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS, | |||
| sgemv_nTS, sgemv_tTS, sger_kTS, | |||
| ssymv_LTS, ssymv_UTS, | |||
| shgemm_kernelTS, shgemm_betaTS, | |||
| #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N | |||
| shgemm_incopyTS, shgemm_itcopyTS, | |||
| sbgemm_kernelTS, sbgemm_betaTS, | |||
| #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N | |||
| sbgemm_incopyTS, sbgemm_itcopyTS, | |||
| #else | |||
| shgemm_oncopyTS, shgemm_otcopyTS, | |||
| sbgemm_oncopyTS, sbgemm_otcopyTS, | |||
| #endif | |||
| shgemm_oncopyTS, shgemm_otcopyTS, | |||
| sbgemm_oncopyTS, sbgemm_otcopyTS, | |||
| strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS, | |||
| #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N | |||
| @@ -830,8 +830,8 @@ gotoblas_t TABLE_NAME = { | |||
| #if (ARCH_ARM64) | |||
| static void init_parameter(void) { | |||
| #if (BUILD_HALF) | |||
| TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | |||
| #if (BUILD_BFLOAT16) | |||
| TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P; | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| @@ -846,8 +846,8 @@ static void init_parameter(void) { | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #endif | |||
| #if (BUILD_HALF) | |||
| TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | |||
| #if (BUILD_BFLOAT16) | |||
| TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q; | |||
| #endif | |||
| #if BUILD_SINGLE == 1 | |||
| TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||
| @@ -862,8 +862,8 @@ static void init_parameter(void) { | |||
| TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; | |||
| #endif | |||
| #if (BUILD_HALF) | |||
| TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | |||
| #if (BUILD_BFLOAT16) | |||
| TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R; | |||
| #endif | |||
| #if BUILD_SINGLE == 1 | |||
| TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | |||
| @@ -936,16 +936,16 @@ static void init_parameter(void) { | |||
| #if (ARCH_POWER) | |||
| static void init_parameter(void) { | |||
| #ifdef BUILD_HALF | |||
| TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | |||
| #ifdef BUILD_BFLOAT16 | |||
| TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P; | |||
| #endif | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #ifdef BUILD_HALF | |||
| TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | |||
| #ifdef BUILD_BFLOAT16 | |||
| TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R; | |||
| #endif | |||
| TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | |||
| TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | |||
| @@ -953,8 +953,8 @@ static void init_parameter(void) { | |||
| TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | |||
| #ifdef BUILD_HALF | |||
| TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | |||
| #ifdef BUILD_BFLOAT16 | |||
| TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q; | |||
| #endif | |||
| TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||
| TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | |||
| @@ -965,16 +965,16 @@ static void init_parameter(void) { | |||
| #if (ARCH_ZARCH) | |||
| static void init_parameter(void) { | |||
| #ifdef BUILD_HALF | |||
| TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | |||
| #ifdef BUILD_BFLOAT16 | |||
| TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P; | |||
| #endif | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #ifdef BUILD_HALF | |||
| TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | |||
| #ifdef BUILD_BFLOAT16 | |||
| TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R; | |||
| #endif | |||
| TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | |||
| TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | |||
| @@ -982,8 +982,8 @@ static void init_parameter(void) { | |||
| TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | |||
| #ifdef BUILD_HALF | |||
| TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | |||
| #ifdef BUILD_BFLOAT16 | |||
| TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q; | |||
| #endif | |||
| TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||
| TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | |||
| @@ -1124,10 +1124,10 @@ static void init_parameter(void) { | |||
| (void) l2; /* dirty trick to suppress unused variable warning for targets */ | |||
| /* where the GEMM unrolling parameters do not depend on l2 */ | |||
| #ifdef BUILD_HALF | |||
| TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | |||
| TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | |||
| TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | |||
| #ifdef BUILD_BFLOAT16 | |||
| TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P; | |||
| TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R; | |||
| TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q; | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||