From 4ba4ab623f045c54d68bd55519b55a3ac5596b74 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Tue, 19 Aug 2014 17:09:45 +0200 Subject: [PATCH] added optimized ssymv_U kernel for nehalem --- kernel/x86_64/KERNEL.NEHALEM | 3 + kernel/x86_64/ssymv_U.c | 132 +++++++++++++++++------ kernel/x86_64/ssymv_U_microk_nehalem-2.c | 130 ++++++++++++++++++++++ 3 files changed, 231 insertions(+), 34 deletions(-) create mode 100644 kernel/x86_64/ssymv_U_microk_nehalem-2.c diff --git a/kernel/x86_64/KERNEL.NEHALEM b/kernel/x86_64/KERNEL.NEHALEM index ca9ff252d..353514449 100644 --- a/kernel/x86_64/KERNEL.NEHALEM +++ b/kernel/x86_64/KERNEL.NEHALEM @@ -1,3 +1,6 @@ +#DSYMV_U_KERNEL = dsymv_U.c +SSYMV_U_KERNEL = ssymv_U.c + SGEMVNKERNEL = sgemv_n.c SGEMVTKERNEL = sgemv_t.c diff --git a/kernel/x86_64/ssymv_U.c b/kernel/x86_64/ssymv_U.c index 75b8e2c3e..61127aa3d 100644 --- a/kernel/x86_64/ssymv_U.c +++ b/kernel/x86_64/ssymv_U.c @@ -31,41 +31,94 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(BULLDOZER) #include "ssymv_U_microk_bulldozer-2.c" +#elif defined(NEHALEM) +#include "ssymv_U_microk_nehalem-2.c" #endif +#ifndef HAVE_KERNEL_4x4 -#ifndef HAVE_KERNEL_16x2 +static void ssymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT *a3, FLOAT *xp, FLOAT *yp, FLOAT *temp1, FLOAT *temp2) +{ + FLOAT at0,at1,at2,at3; + FLOAT x; + FLOAT tmp2[4] = { 0.0, 0.0, 0.0, 0.0 }; + FLOAT tp0; + FLOAT tp1; + FLOAT tp2; + FLOAT tp3; + BLASLONG i; + + tp0 = temp1[0]; + tp1 = temp1[1]; + tp2 = temp1[2]; + tp3 = temp1[3]; + + for (i=0; i