Browse Source

optimized saxpy.c for increments != 1

tags/v0.2.15^2
Werner Saar 10 years ago
parent
commit
dee100d0e4
1 changed files with 25 additions and 2 deletions
  1. +25
    -2
      kernel/x86_64/saxpy.c

+ 25
- 2
kernel/x86_64/saxpy.c View File

@@ -76,9 +76,9 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
{ {


#if defined(SANDYBRIDGE) #if defined(SANDYBRIDGE)
int n1 = n & -64;
BLASLONG n1 = n & -64;
#else #else
int n1 = n & -32;
BLASLONG n1 = n & -32;
#endif #endif


if ( n1 ) if ( n1 )
@@ -97,6 +97,29 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS


} }


BLASLONG n1 = n & -4;

while(i < n1)
{

FLOAT m1 = da * x[ix] ;
FLOAT m2 = da * x[ix+inc_x] ;
FLOAT m3 = da * x[ix+2*inc_x] ;
FLOAT m4 = da * x[ix+3*inc_x] ;

y[iy] += m1 ;
y[iy+inc_y] += m2 ;
y[iy+2*inc_y] += m3 ;
y[iy+3*inc_y] += m4 ;

ix += inc_x*4 ;
iy += inc_y*4 ;
i+=4 ;


}


while(i < n) while(i < n)
{ {




Loading…
Cancel
Save