Browse Source

Create a AVX512 enabled version of DGEMM

This patch adds dgemm_kernel_4x8_skylakex.c which is
* dgemm_kernel_4x8_haswell.s converted to C + intrinsics
* 8x8 support added
* 8x8 kernel implemented using AVX512

Performance is a work in progress, but already shows a 10% - 20%
increase for a wide range of matrix sizes.
tags/v0.3.4
Arjan van de Ven 7 years ago
parent
commit
45fe8cb0c5
2 changed files with 1293 additions and 11 deletions
  1. +5
    -11
      kernel/x86_64/KERNEL.SKYLAKEX
  2. +1288
    -0
      kernel/x86_64/dgemm_kernel_4x8_skylakex.c

+ 5
- 11
kernel/x86_64/KERNEL.SKYLAKEX View File

@@ -2,18 +2,12 @@ include $(KERNELDIR)/KERNEL.HASWELL

SGEMMKERNEL = sgemm_kernel_16x4_skylakex.S

DGEMMKERNEL = dgemm_kernel_4x8_skylakex.c

#DTRMMKERNEL = ../generic/trmmkernel_16x2.c
#DGEMMKERNEL = dgemm_kernel_16x2_skylakex.S
#DGEMMINCOPY = ../generic/gemm_ncopy_16.c
#DGEMMITCOPY = ../generic/gemm_tcopy_16.c
#DGEMMONCOPY = ../generic/gemm_ncopy_2.c
#DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
#DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
#DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
#DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
#DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)

DGEMMINCOPY = ../generic/gemm_ncopy_8.c
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c

SGEMM_BETA = ../generic/gemm_beta.c
DGEMM_BETA = ../generic/gemm_beta.c

+ 1288
- 0
kernel/x86_64/dgemm_kernel_4x8_skylakex.c
File diff suppressed because it is too large
View File


Loading…
Cancel
Save