| @@ -9,6 +9,10 @@ ifeq ($(ARCH), x86_64) | |||||
| USE_GEMM3M = 1 | USE_GEMM3M = 1 | ||||
| endif | endif | ||||
| ifeq ($(ARCH), x86_64) | |||||
| USE_DIRECT_SGEMM = 1 | |||||
| endif | |||||
| ifeq ($(ARCH), ia64) | ifeq ($(ARCH), ia64) | ||||
| USE_GEMM3M = 1 | USE_GEMM3M = 1 | ||||
| endif | endif | ||||
| @@ -65,6 +69,13 @@ ifeq ($(CORE), Z14) | |||||
| USE_TRMM = 1 | USE_TRMM = 1 | ||||
| endif | endif | ||||
| ifdef USE_DIRECT_SGEMM | |||||
| ifndef SGEMMDIRECTKERNEL | |||||
| SGEMMDIRECTKERNEL = sgemm_direct_skylakex.c | |||||
| SGEMMDIRECTPERFORMANT = sgemm_direct_performant.c | |||||
| endif | |||||
| endif | |||||
| ifeq ($(BUILD_HALF), 1) | ifeq ($(BUILD_HALF), 1) | ||||
| ifndef SHGEMMKERNEL | ifndef SHGEMMKERNEL | ||||
| SHGEMM_BETA = ../generic/gemm_beta.c | SHGEMM_BETA = ../generic/gemm_beta.c | ||||
| @@ -90,6 +101,12 @@ SKERNELOBJS += \ | |||||
| $(SGEMMINCOPYOBJ) $(SGEMMITCOPYOBJ) \ | $(SGEMMINCOPYOBJ) $(SGEMMITCOPYOBJ) \ | ||||
| $(SGEMMONCOPYOBJ) $(SGEMMOTCOPYOBJ) | $(SGEMMONCOPYOBJ) $(SGEMMOTCOPYOBJ) | ||||
| ifdef USE_DIRECT_SGEMM | |||||
| SKERNELOBJS += \ | |||||
| sgemm_direct$(TSUFFIX).$(SUFFIX) \ | |||||
| sgemm_direct_performant$(TSUFFIX).$(SUFFIX) | |||||
| endif | |||||
| DKERNELOBJS += \ | DKERNELOBJS += \ | ||||
| dgemm_kernel$(TSUFFIX).$(SUFFIX) \ | dgemm_kernel$(TSUFFIX).$(SUFFIX) \ | ||||
| $(DGEMMINCOPYOBJ) $(DGEMMITCOPYOBJ) \ | $(DGEMMINCOPYOBJ) $(DGEMMITCOPYOBJ) \ | ||||
| @@ -668,6 +685,13 @@ else | |||||
| $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | ||||
| endif | endif | ||||
| ifdef USE_DIRECT_SGEMM | |||||
| $(KDIR)sgemm_direct_performant$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTPERFORMANT) | |||||
| $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | |||||
| $(KDIR)sgemm_direct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTKERNEL) | |||||
| $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | |||||
| endif | |||||
| ifeq ($(BUILD_HALF), 1) | ifeq ($(BUILD_HALF), 1) | ||||
| $(KDIR)shgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND) | $(KDIR)shgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND) | ||||