Browse Source

style: modify HALF to BFLOAT16 in benchmark folder

pull/5290/head
Srangrang 10 months ago
parent
commit
9f13b2c6ac
4 changed files with 43 additions and 31 deletions
  1. +21
    -5
      benchmark/Makefile
  2. +6
    -1
      benchmark/gemm.c
  3. +16
    -16
      driver/level3/Makefile
  4. +0
    -9
      kernel/x86_64/KERNEL.HASWELL

+ 21
- 5
benchmark/Makefile View File

@@ -56,9 +56,15 @@ GOTO_LAPACK_TARGETS=
endif

ifeq ($(BUILD_BFLOAT16),1)
GOTO_HALF_TARGETS=sbgemm.goto
GOTO_BFLOAT_TARGETS=sbgemm.goto
else
GOTO_HALF_TARGETS=
GOTO_BFLOAT_TARGETS=
endif

ifeq ($(BUILD_HFLOAT16),1)
GOTO_HFLOAT_TARGETS=shgemm.goto
else
GOTO_HFLOAT_TARGETS=
endif

ifeq ($(OSNAME), WINNT)
@@ -104,7 +110,7 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \
ssymm.goto dsymm.goto csymm.goto zsymm.goto \
somatcopy.goto domatcopy.goto comatcopy.goto zomatcopy.goto \
saxpby.goto daxpby.goto caxpby.goto zaxpby.goto $(GOTO_HALF_TARGETS)
saxpby.goto daxpby.goto caxpby.goto zaxpby.goto $(GOTO_BFLOAT_TARGETS) $(GOTO_HFLOAT_TARGETS)

acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
@@ -278,7 +284,7 @@ goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
smin.goto dmin.goto \
saxpby.goto daxpby.goto caxpby.goto zaxpby.goto \
somatcopy.goto domatcopy.goto comatcopy.goto zomatcopy.goto \
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto $(GOTO_LAPACK_TARGETS) $(GOTO_HALF_TARGETS)
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto $(GOTO_LAPACK_TARGETS) $(GOTO_BFLOAT_TARGETS) $(GOTO_HFLOAT_TARGETS)

acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
@@ -633,6 +639,11 @@ sbgemm.goto : sbgemm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
endif

ifeq ($(BUILD_HFLOAT16),1)
shgemm.goto : shgemm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
endif

sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm

@@ -2960,7 +2971,12 @@ zcholesky.$(SUFFIX) : cholesky.c

ifeq ($(BUILD_BFLOAT16),1)
sbgemm.$(SUFFIX) : gemm.c
$(CC) $(CFLAGS) -c -DHALF -UCOMPLEX -UDOUBLE -o $(@F) $^
$(CC) $(CFLAGS) -c -DBFLOAT16 -UCOMPLEX -UDOUBLE -o $(@F) $^
endif

ifeq ($(BUILD_HFLOAT16),1)
shgemm.$(SUFFIX) : gemm.c
$(CC) $(CFLAGS) -c -DHFLOAT16 -UCOMPLEX -UDOUBLE -o $(@F) $^
endif

sgemm.$(SUFFIX) : gemm.c


+ 6
- 1
benchmark/gemm.c View File

@@ -33,12 +33,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#ifdef DOUBLE
#define GEMM BLASFUNC(dgemm)
#elif defined(HALF)
#elif defined(BFLOAT16)
#define GEMM BLASFUNC(sbgemm)
#undef IFLOAT
#define IFLOAT bfloat16
#elif defined(HFLOAT16)
#define GEMM BLASFUNC(shgemm)
#undef IFLOAT
#define IFLOAT hfloat16
#else
#define GEMM BLASFUNC(sgemm)
#define IFLOAT float
#endif

#else


+ 16
- 16
driver/level3/Makefile View File

@@ -351,16 +351,16 @@ endif
all ::

sbgemm_nn.$(SUFFIX) : gemm.c level3.c ../../param.h
$(CC) $(CFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
$(CC) $(CFLAGS) $(BLOCKS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)

sbgemm_nt.$(SUFFIX) : gemm.c level3.c ../../param.h
$(CC) $(CFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DNT $< -o $(@F)
$(CC) $(CFLAGS) $(BLOCKS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DNT $< -o $(@F)

sbgemm_tn.$(SUFFIX) : gemm.c level3.c ../../param.h
$(CC) $(CFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DTN $< -o $(@F)
$(CC) $(CFLAGS) $(BLOCKS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DTN $< -o $(@F)

sbgemm_tt.$(SUFFIX) : gemm.c level3.c ../../param.h
$(CC) $(CFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F)
$(CC) $(CFLAGS) $(BLOCKS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DTT $< -o $(@F)

shgemm_nn.$(SUFFIX) : gemm.c level3.c ../../param.h
$(CC) $(CFLAGS) $(BLOCKS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
@@ -570,16 +570,16 @@ beta_thread.$(SUFFIX) : beta_thread.c ../../common.h
$(CC) -c $(CFLAGS) $< -o $(@F)

sbgemm_thread_nn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DBFLOAT16 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)

sbgemm_thread_nt.$(SUFFIX) : gemm.c level3_thread.c ../../param.h
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DNT $< -o $(@F)
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DBFLOAT16 -UDOUBLE -UCOMPLEX -DNT $< -o $(@F)

sbgemm_thread_tn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DTN $< -o $(@F)
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DBFLOAT16 -UDOUBLE -UCOMPLEX -DTN $< -o $(@F)

sbgemm_thread_tt.$(SUFFIX) : gemm.c level3_thread.c ../../param.h
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F)
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DBFLOAT16 -UDOUBLE -UCOMPLEX -DTT $< -o $(@F)

shgemm_thread_nn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHFLOAT16 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
@@ -2767,16 +2767,16 @@ xtrsm_RCLN.$(SUFFIX) : trsm_R.c
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA -UUPPER -UUNIT -DCONJ $< -o $(@F)

sbgemm_nn.$(PSUFFIX) : gemm.c level3.c ../../param.h
$(CC) $(PFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
$(CC) $(PFLAGS) $(BLOCKS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)

sbgemm_nt.$(PSUFFIX) : gemm.c level3.c ../../param.h
$(CC) $(PFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DNT $< -o $(@F)
$(CC) $(PFLAGS) $(BLOCKS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DNT $< -o $(@F)

sbgemm_tn.$(PSUFFIX) : gemm.c level3.c ../../param.h
$(CC) $(PFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DTN $< -o $(@F)
$(CC) $(PFLAGS) $(BLOCKS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DTN $< -o $(@F)

sbgemm_tt.$(PSUFFIX) : gemm.c level3.c ../../param.h
$(CC) $(PFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F)
$(CC) $(PFLAGS) $(BLOCKS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DTT $< -o $(@F)

shgemm_nn.$(PSUFFIX) : gemm.c level3.c ../../param.h
$(CC) $(PFLAGS) $(BLOCKS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
@@ -3002,16 +3002,16 @@ zgemm_batch_thread.$(SUFFIX) : gemm_batch_thread.c ../../common.h


sbgemm_thread_nn.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DBFLOAT16 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)

sbgemm_thread_nt.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DNT $< -o $(@F)
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DBFLOAT16 -UDOUBLE -UCOMPLEX -DNT $< -o $(@F)

sbgemm_thread_tn.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DTN $< -o $(@F)
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DBFLOAT16 -UDOUBLE -UCOMPLEX -DTN $< -o $(@F)

sbgemm_thread_tt.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F)
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DBFLOAT16 -UDOUBLE -UCOMPLEX -DTT $< -o $(@F)

shgemm_thread_nn.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHFLOAT16 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)


+ 0
- 9
kernel/x86_64/KERNEL.HASWELL View File

@@ -106,12 +106,3 @@ DASUMKERNEL = dasum.c

SROTKERNEL = srot.c
DROTKERNEL = drot.c


ifeq ($(BUILD_BFLOAT16), 1)
SHGEMMKERNEL = ../generic/gemmkernel_2x2.c
SHGEMMONCOPY = ../generic/gemm_ncopy_2.c
SHGEMMOTCOPY = ../generic/gemm_tcopy_2.c
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX)
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX)
endif

Loading…
Cancel
Save