| @@ -104,6 +104,23 @@ matrix: | |||||
| # for matrix annotation only | # for matrix annotation only | ||||
| - TARGET_BOX=PPC64LE_LINUX_P9 | - TARGET_BOX=PPC64LE_LINUX_P9 | ||||
| - os: linux | |||||
| arch: ppc64le | |||||
| dist: bionic | |||||
| compiler: gcc | |||||
| before_script: | |||||
| - sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y | |||||
| - sudo apt-get update | |||||
| - sudo apt-get install gcc-9 gfortran-9 -y | |||||
| script: | |||||
| - make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9 | |||||
| - make -C test $COMMON_FLAGS $BTYPE | |||||
| - make -C ctest $COMMON_FLAGS $BTYPE | |||||
| - make -C utest $COMMON_FLAGS $BTYPE | |||||
| env: | |||||
| # for matrix annotation only | |||||
| - TARGET_BOX=PPC64LE_LINUX_P9 | |||||
| - os: linux | - os: linux | ||||
| compiler: gcc | compiler: gcc | ||||
| addons: | addons: | ||||
| @@ -1,18 +1,18 @@ | |||||
| SHBLASOBJS_P = $(SHBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||||
| SBBLASOBJS_P = $(SBBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||||
| SBLASOBJS_P = $(SBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | SBLASOBJS_P = $(SBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | ||||
| DBLASOBJS_P = $(DBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | DBLASOBJS_P = $(DBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | ||||
| QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | ||||
| CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | ||||
| ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | ||||
| XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | ||||
| SHEXTOBJS_P = $(SHEXTOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||||
| SBEXTOBJS_P = $(SBEXTOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||||
| COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX)) | COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX)) | ||||
| HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX)) | HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX)) | ||||
| BLASOBJS = $(SHEXTOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS) | |||||
| BLASOBJS_P = $(SHEXTOBJS_P) $(SHBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P) | |||||
| BLASOBJS = $(SBEXTOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS) | |||||
| BLASOBJS_P = $(SBEXTOBJS_P) $(SBBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P) | |||||
| ifdef EXPRECISION | ifdef EXPRECISION | ||||
| BLASOBJS += $(QBLASOBJS) $(XBLASOBJS) | BLASOBJS += $(QBLASOBJS) $(XBLASOBJS) | ||||
| @@ -24,23 +24,23 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS) | |||||
| BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P) | BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P) | ||||
| endif | endif | ||||
| $(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX | |||||
| $(SBBLASOBJS) $(SBBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX | |||||
| $(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX | $(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX | ||||
| $(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX | $(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX | ||||
| $(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX | $(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX | ||||
| $(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX | $(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX | ||||
| $(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX | $(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX | ||||
| $(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX | $(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX | ||||
| $(SHEXTOBJS) $(SHEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX | |||||
| $(SBEXTOBJS) $(SBEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX | |||||
| $(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||||
| $(SBBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||||
| $(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | $(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | ||||
| $(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | $(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | ||||
| $(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | $(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | ||||
| $(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | $(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | ||||
| $(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | $(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | ||||
| $(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | $(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | ||||
| $(SHEXTOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||||
| $(SBEXTOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||||
| libs :: $(BLASOBJS) $(COMMONOBJS) | libs :: $(BLASOBJS) $(COMMONOBJS) | ||||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | ||||
| @@ -384,9 +384,9 @@ void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint | |||||
| /*** BFLOAT16 and INT8 extensions ***/ | /*** BFLOAT16 and INT8 extensions ***/ | ||||
| /* convert float array to BFLOAT16 array by rounding */ | /* convert float array to BFLOAT16 array by rounding */ | ||||
| void cblas_shstobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *in, OPENBLAS_CONST blasint incin, bfloat16 *out, OPENBLAS_CONST blasint incout); | |||||
| void cblas_sbstobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *in, OPENBLAS_CONST blasint incin, bfloat16 *out, OPENBLAS_CONST blasint incout); | |||||
| /* convert double array to BFLOAT16 array by rounding */ | /* convert double array to BFLOAT16 array by rounding */ | ||||
| void cblas_shdtobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *in, OPENBLAS_CONST blasint incin, bfloat16 *out, OPENBLAS_CONST blasint incout); | |||||
| void cblas_sbdtobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *in, OPENBLAS_CONST blasint incin, bfloat16 *out, OPENBLAS_CONST blasint incout); | |||||
| /* convert BFLOAT16 array to float array */ | /* convert BFLOAT16 array to float array */ | ||||
| void cblas_sbf16tos(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, float *out, OPENBLAS_CONST blasint incout); | void cblas_sbf16tos(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, float *out, OPENBLAS_CONST blasint incout); | ||||
| /* convert BFLOAT16 array to double array */ | /* convert BFLOAT16 array to double array */ | ||||
| @@ -257,7 +257,7 @@ typedef long BLASLONG; | |||||
| typedef unsigned long BLASULONG; | typedef unsigned long BLASULONG; | ||||
| #endif | #endif | ||||
| #ifndef BFLOAT16 | |||||
| #ifndef bfloat16 | |||||
| #include <stdint.h> | #include <stdint.h> | ||||
| typedef uint16_t bfloat16; | typedef uint16_t bfloat16; | ||||
| #define BFLOAT16CONVERSION 1 | #define BFLOAT16CONVERSION 1 | ||||
| @@ -55,8 +55,8 @@ double BLASFUNC(ddot) (blasint *, double *, blasint *, double *, blasint *); | |||||
| xdouble BLASFUNC(qdot) (blasint *, xdouble *, blasint *, xdouble *, blasint *); | xdouble BLASFUNC(qdot) (blasint *, xdouble *, blasint *, xdouble *, blasint *); | ||||
| float BLASFUNC(sbdot) (blasint *, bfloat16 *, blasint *, bfloat16 *, blasint *); | float BLASFUNC(sbdot) (blasint *, bfloat16 *, blasint *, bfloat16 *, blasint *); | ||||
| void BLASFUNC(shstobf16) (blasint *, float *, blasint *, bfloat16 *, blasint *); | |||||
| void BLASFUNC(shdtobf16) (blasint *, double *, blasint *, bfloat16 *, blasint *); | |||||
| void BLASFUNC(sbstobf16) (blasint *, float *, blasint *, bfloat16 *, blasint *); | |||||
| void BLASFUNC(sbdtobf16) (blasint *, double *, blasint *, bfloat16 *, blasint *); | |||||
| void BLASFUNC(sbf16tos) (blasint *, bfloat16 *, blasint *, float *, blasint *); | void BLASFUNC(sbf16tos) (blasint *, bfloat16 *, blasint *, float *, blasint *); | ||||
| void BLASFUNC(dbf16tod) (blasint *, bfloat16 *, blasint *, double *, blasint *); | void BLASFUNC(dbf16tod) (blasint *, bfloat16 *, blasint *, double *, blasint *); | ||||
| @@ -48,8 +48,8 @@ double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||||
| xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | ||||
| float sbdot_k(BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG); | float sbdot_k(BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG); | ||||
| void shstobf16_k(BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG); | |||||
| void shdtobf16_k(BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG); | |||||
| void sbstobf16_k(BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG); | |||||
| void sbdtobf16_k(BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG); | |||||
| void sbf16tos_k (BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG); | void sbf16tos_k (BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG); | ||||
| void dbf16tod_k (BLASLONG, bfloat16 *, BLASLONG, double *, BLASLONG); | void dbf16tod_k (BLASLONG, bfloat16 *, BLASLONG, double *, BLASLONG); | ||||
| @@ -646,9 +646,9 @@ | |||||
| #elif defined(BFLOAT16) | #elif defined(BFLOAT16) | ||||
| #define D_TO_BF16_K SHDTOBF16_K | |||||
| #define D_TO_BF16_K SBDTOBF16_K | |||||
| #define D_BF16_TO_K DBF16TOD_K | #define D_BF16_TO_K DBF16TOD_K | ||||
| #define S_TO_BF16_K SHSTOBF16_K | |||||
| #define S_TO_BF16_K SBSTOBF16_K | |||||
| #define S_BF16_TO_K SBF16TOS_K | #define S_BF16_TO_K SBF16TOS_K | ||||
| #define AMAX_K SAMAX_K | #define AMAX_K SAMAX_K | ||||
| @@ -20,7 +20,7 @@ USE_GEMM3M = 1 | |||||
| endif | endif | ||||
| ifeq ($(BUILD_BFLOAT16),1) | ifeq ($(BUILD_BFLOAT16),1) | ||||
| SHBLASOBJS += sbgemm_nn.$(SUFFIX) sbgemm_nt.$(SUFFIX) sbgemm_tn.$(SUFFIX) sbgemm_tt.$(SUFFIX) | |||||
| SBBLASOBJS += sbgemm_nn.$(SUFFIX) sbgemm_nt.$(SUFFIX) sbgemm_tn.$(SUFFIX) sbgemm_tt.$(SUFFIX) | |||||
| endif | endif | ||||
| SBLASOBJS += \ | SBLASOBJS += \ | ||||
| @@ -208,7 +208,7 @@ COMMONOBJS += syrk_thread.$(SUFFIX) | |||||
| ifndef USE_SIMPLE_THREADED_LEVEL3 | ifndef USE_SIMPLE_THREADED_LEVEL3 | ||||
| ifeq ($(BUILD_BFLOAT16),1) | ifeq ($(BUILD_BFLOAT16),1) | ||||
| SHBLASOBJS += sbgemm_thread_nn.$(SUFFIX) sbgemm_thread_nt.$(SUFFIX) sbgemm_thread_tn.$(SUFFIX) sbgemm_thread_tt.$(SUFFIX) | |||||
| SBBLASOBJS += sbgemm_thread_nn.$(SUFFIX) sbgemm_thread_nt.$(SUFFIX) sbgemm_thread_tn.$(SUFFIX) sbgemm_thread_tt.$(SUFFIX) | |||||
| endif | endif | ||||
| SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX) | SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX) | ||||
| DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) | DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) | ||||
| @@ -51,7 +51,7 @@ | |||||
| zgeadd, dzsum); | zgeadd, dzsum); | ||||
| @cblasobjs = (lsame, xerbla); | @cblasobjs = (lsame, xerbla); | ||||
| @halfblasobjs = (sbgemm, sbdot, shstobf16, shdtobf16, sbf16tos, dbf16tod); | |||||
| @halfblasobjs = (sbgemm, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod); | |||||
| @cblasobjsc = ( | @cblasobjsc = ( | ||||
| cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv, | cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv, | ||||
| cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k, | cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k, | ||||
| @@ -94,7 +94,7 @@ | |||||
| @cblasobjs = ( cblas_xerbla ); | @cblasobjs = ( cblas_xerbla ); | ||||
| @halfcblasobjs = (cblas_sbgemm, cblas_sbdot, cblas_shstobf16, cblas_shdtobf16, cblas_sbf16tos, cblas_dbf16tod); | |||||
| @halfcblasobjs = (cblas_sbgemm, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod); | |||||
| @exblasobjs = ( | @exblasobjs = ( | ||||
| qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm, | qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm, | ||||
| @@ -283,9 +283,9 @@ CSBLAS3OBJS = \ | |||||
| cblas_sgeadd.$(SUFFIX) | cblas_sgeadd.$(SUFFIX) | ||||
| ifeq ($(BUILD_BFLOAT16),1) | ifeq ($(BUILD_BFLOAT16),1) | ||||
| CBHBLAS1OBJS = cblas_sbdot.$(SUFFIX) | |||||
| CBHBLAS3OBJS = cblas_sbgemm.$(SUFFIX) | |||||
| CBHEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX) | |||||
| CSBBLAS1OBJS = cblas_sbdot.$(SUFFIX) | |||||
| CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX) | |||||
| CSBEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX) | |||||
| endif | endif | ||||
| CDBLAS1OBJS = \ | CDBLAS1OBJS = \ | ||||
| @@ -535,19 +535,19 @@ endif | |||||
| clean :: | clean :: | ||||
| @rm -f functable.h | @rm -f functable.h | ||||
| level1 : $(BEXTOBJS) $(SHBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) | |||||
| level1 : $(SBEXTOBJS) $(SBBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) | |||||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | ||||
| level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) | level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) | ||||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | ||||
| level3 : $(SHBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) | |||||
| level3 : $(SBBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) | |||||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | ||||
| aux : $(CBAUXOBJS) | aux : $(CBAUXOBJS) | ||||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | ||||
| $(CSHBLASOBJS) $(CSHBLASOBJS_P) $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \ | |||||
| $(CSBBLASOBJS) $(CSBBLASOBJS_P) $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \ | |||||
| $(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) $(CBAUXOBJS_P) : override CFLAGS += -DCBLAS | $(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) $(CBAUXOBJS_P) : override CFLAGS += -DCBLAS | ||||
| srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c | srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c | ||||
| @@ -775,9 +775,9 @@ dsdot.$(SUFFIX) dsdot.$(PSUFFIX) : dsdot.c | |||||
| ifeq ($(BUILD_BFLOAT16),1) | ifeq ($(BUILD_BFLOAT16),1) | ||||
| sbdot.$(SUFFIX) sbdot.$(PSUFFIX) : bf16dot.c | sbdot.$(SUFFIX) sbdot.$(PSUFFIX) : bf16dot.c | ||||
| $(CC) $(CFLAGS) -c $< -o $(@F) | $(CC) $(CFLAGS) -c $< -o $(@F) | ||||
| shstobf16.$(SUFFIX) shstobf16.$(PSUFFIX) : tobf16.c | |||||
| sbstobf16.$(SUFFIX) sbstobf16.$(PSUFFIX) : tobf16.c | |||||
| $(CC) $(CFLAGS) -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) | $(CC) $(CFLAGS) -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) | ||||
| shdtobf16.$(SUFFIX) shdtobf16.$(PSUFFIX) : tobf16.c | |||||
| sbdtobf16.$(SUFFIX) sbdtobf16.$(PSUFFIX) : tobf16.c | |||||
| $(CC) $(CFLAGS) -USINGLE_PREC -DDOUBLE_PREC -c $< -o $(@F) | $(CC) $(CFLAGS) -USINGLE_PREC -DDOUBLE_PREC -c $< -o $(@F) | ||||
| sbf16tos.$(SUFFIX) sbf16tos.$(PSUFFIX) : bf16to.c | sbf16tos.$(SUFFIX) sbf16tos.$(PSUFFIX) : bf16to.c | ||||
| $(CC) $(CFLAGS) -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) | $(CC) $(CFLAGS) -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) | ||||
| @@ -1526,9 +1526,9 @@ cblas_dsdot.$(SUFFIX) cblas_dsdot.$(PSUFFIX) : dsdot.c | |||||
| ifeq ($(BUILD_BFLOAT16),1) | ifeq ($(BUILD_BFLOAT16),1) | ||||
| cblas_sbdot.$(SUFFIX) cblas_sbdot.$(PSUFFIX) : bf16dot.c | cblas_sbdot.$(SUFFIX) cblas_sbdot.$(PSUFFIX) : bf16dot.c | ||||
| $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | ||||
| cblas_shstobf16.$(SUFFIX) cblas_shstobf16.$(PSUFFIX) : tobf16.c | |||||
| cblas_sbstobf16.$(SUFFIX) cblas_sbstobf16.$(PSUFFIX) : tobf16.c | |||||
| $(CC) $(CFLAGS) -DCBLAS -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) | $(CC) $(CFLAGS) -DCBLAS -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) | ||||
| cblas_shdtobf16.$(SUFFIX) cblas_shdtobf16.$(PSUFFIX) : tobf16.c | |||||
| cblas_sbdtobf16.$(SUFFIX) cblas_sbdtobf16.$(PSUFFIX) : tobf16.c | |||||
| $(CC) $(CFLAGS) -DCBLAS -USINGLE_PREC -DDOUBLE_PREC -c $< -o $(@F) | $(CC) $(CFLAGS) -DCBLAS -USINGLE_PREC -DDOUBLE_PREC -c $< -o $(@F) | ||||
| cblas_sbf16tos.$(SUFFIX) cblas_sbf16tos.$(PSUFFIX) : bf16to.c | cblas_sbf16tos.$(SUFFIX) cblas_sbf16tos.$(PSUFFIX) : bf16to.c | ||||
| $(CC) $(CFLAGS) -DCBLAS -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) | $(CC) $(CFLAGS) -DCBLAS -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) | ||||
| @@ -531,11 +531,11 @@ XBLASOBJS += \ | |||||
| xscal_k$(TSUFFIX).$(SUFFIX) xswap_k$(TSUFFIX).$(SUFFIX) xsum_k$(TSUFFIX).$(SUFFIX) | xscal_k$(TSUFFIX).$(SUFFIX) xswap_k$(TSUFFIX).$(SUFFIX) xsum_k$(TSUFFIX).$(SUFFIX) | ||||
| ifeq ($(BUILD_BFLOAT16),1) | ifeq ($(BUILD_BFLOAT16),1) | ||||
| SHBLASOBJS += \ | |||||
| SBBLASOBJS += \ | |||||
| sbdot_k$(TSUFFIX).$(SUFFIX) | sbdot_k$(TSUFFIX).$(SUFFIX) | ||||
| SHEXTOBJS += \ | |||||
| SBEXTOBJS += \ | |||||
| sbstobf16_k$(TSUFFIX).$(SUFFIX) sbdtobf16_k$(TSUFFIX).$(SUFFIX) | sbstobf16_k$(TSUFFIX).$(SUFFIX) sbdtobf16_k$(TSUFFIX).$(SUFFIX) | ||||
| SHEXTOBJS += \ | |||||
| SBEXTOBJS += \ | |||||
| sbf16tos_k$(TSUFFIX).$(SUFFIX) dbf16tod_k$(TSUFFIX).$(SUFFIX) | sbf16tos_k$(TSUFFIX).$(SUFFIX) dbf16tod_k$(TSUFFIX).$(SUFFIX) | ||||
| endif | endif | ||||
| @@ -94,7 +94,7 @@ SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX) | SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX) | ||||
| endif | endif | ||||
| SHKERNELOBJS += \ | |||||
| SBKERNELOBJS += \ | |||||
| sbgemm_kernel$(TSUFFIX).$(SUFFIX) \ | sbgemm_kernel$(TSUFFIX).$(SUFFIX) \ | ||||
| $(SBGEMMINCOPYOBJ) $(SBGEMMITCOPYOBJ) \ | $(SBGEMMINCOPYOBJ) $(SBGEMMITCOPYOBJ) \ | ||||
| $(SBGEMMONCOPYOBJ) $(SBGEMMOTCOPYOBJ) | $(SBGEMMONCOPYOBJ) $(SBGEMMOTCOPYOBJ) | ||||
| @@ -150,7 +150,7 @@ XKERNELOBJS += \ | |||||
| $(XGEMMONCOPYOBJ) $(XGEMMOTCOPYOBJ) | $(XGEMMONCOPYOBJ) $(XGEMMOTCOPYOBJ) | ||||
| ifeq ($(BUILD_BFLOAT16),1) | ifeq ($(BUILD_BFLOAT16),1) | ||||
| SHBLASOBJS += $(SHKERNELOBJS) | |||||
| SBBLASOBJS += $(SBKERNELOBJS) | |||||
| endif | endif | ||||
| SBLASOBJS += $(SKERNELOBJS) | SBLASOBJS += $(SKERNELOBJS) | ||||
| DBLASOBJS += $(DKERNELOBJS) | DBLASOBJS += $(DKERNELOBJS) | ||||
| @@ -160,7 +160,7 @@ ZBLASOBJS += $(ZKERNELOBJS) | |||||
| XBLASOBJS += $(XKERNELOBJS) | XBLASOBJS += $(XKERNELOBJS) | ||||
| ifeq ($(BUILD_BFLOAT16),1) | ifeq ($(BUILD_BFLOAT16),1) | ||||
| SHBLASOBJS += sbgemm_beta$(TSUFFIX).$(SUFFIX) | |||||
| SBBLASOBJS += sbgemm_beta$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | ||||
| @@ -214,11 +214,9 @@ endif | |||||
| #ifeq ($(BUILD_BFLOAT16),1) | |||||
| #level3 : test_sbgemm sblat3 dblat3 cblat3 zblat3 | |||||
| #else | |||||
| #level3 : sblat3 dblat3 cblat3 zblat3 | |||||
| #endif | |||||
| ifeq ($(BUILD_BFLOAT16),1) | |||||
| level3 : test_sbgemm | |||||
| endif | |||||
| ifndef CROSS | ifndef CROSS | ||||
| rm -f ?BLAT3.SUMM | rm -f ?BLAT3.SUMM | ||||