From ef6374196d0fbb69d0720c973abad9ef39a89253 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Wed, 20 Aug 2014 09:00:56 +0200 Subject: [PATCH] updated optimized dsymv_U kernel for bulldozer --- kernel/x86_64/dsymv_U.c | 133 +++++++++++++++------ kernel/x86_64/dsymv_U_microk_bulldozer-2.c | 125 ++++++++++--------- 2 files changed, 168 insertions(+), 90 deletions(-) diff --git a/kernel/x86_64/dsymv_U.c b/kernel/x86_64/dsymv_U.c index 1f22abe8d..267755c2f 100644 --- a/kernel/x86_64/dsymv_U.c +++ b/kernel/x86_64/dsymv_U.c @@ -28,43 +28,97 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" + #if defined(BULLDOZER) #include "dsymv_U_microk_bulldozer-2.c" +#elif defined(NEHALEM) +#include "dsymv_U_microk_nehalem-2.c" #endif +#ifndef HAVE_KERNEL_4x4 + +static void dsymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT *a3, FLOAT *xp, FLOAT *yp, FLOAT *temp1, FLOAT *temp2) +{ + FLOAT at0,at1,at2,at3; + FLOAT x; + FLOAT tmp2[4] = { 0.0, 0.0, 0.0, 0.0 }; + FLOAT tp0; + FLOAT tp1; + FLOAT tp2; + FLOAT tp3; + BLASLONG i; -#ifndef HAVE_KERNEL_8x2 + tp0 = temp1[0]; + tp1 = temp1[1]; + tp2 = temp1[2]; + tp3 = temp1[3]; + + for (i=0; i