Browse Source

bugfix for sgemv_n_4.c

tags/v0.2.12^2
wernsaar 11 years ago
parent
commit
53de943690
2 changed files with 13 additions and 30 deletions
  1. +1
    -1
      kernel/x86_64/KERNEL.BULLDOZER
  2. +12
    -29
      kernel/x86_64/sgemv_n_4.c

+ 1
- 1
kernel/x86_64/KERNEL.BULLDOZER View File

@@ -10,7 +10,7 @@ DSYMV_L_KERNEL = dsymv_L.c
SSYMV_U_KERNEL = ssymv_U.c
SSYMV_L_KERNEL = ssymv_L.c

SGEMVNKERNEL = sgemv_n.c
SGEMVNKERNEL = sgemv_n_4.c
SGEMVTKERNEL = sgemv_t_4.c

ZGEMVNKERNEL = zgemv_n_dup.S


+ 12
- 29
kernel/x86_64/sgemv_n_4.c View File

@@ -185,8 +185,17 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO

ybuffer = buffer;
n1 = n >> 3 ;
n2 = n & 7 ;
if ( inc_x == 1 )
{
n1 = n >> 3 ;
n2 = n & 7 ;
}
else
{
n1 = n >> 2 ;
n2 = n & 3 ;

}
m3 = m & 3 ;
m1 = m & -4 ;
@@ -258,32 +267,6 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
{

for( i = 0; i < n1 ; i++)
{
xbuffer[0] = x_ptr[0];
x_ptr += inc_x;
xbuffer[1] = x_ptr[0];
x_ptr += inc_x;
xbuffer[2] = x_ptr[0];
x_ptr += inc_x;
xbuffer[3] = x_ptr[0];
x_ptr += inc_x;
xbuffer[4] = x_ptr[0];
x_ptr += inc_x;
xbuffer[5] = x_ptr[0];
x_ptr += inc_x;
xbuffer[6] = x_ptr[0];
x_ptr += inc_x;
xbuffer[7] = x_ptr[0];
x_ptr += inc_x;
sgemv_kernel_4x8(NB,ap,x_ptr,ybuffer,lda4);
ap[0] += lda8;
ap[1] += lda8;
ap[2] += lda8;
ap[3] += lda8;
a_ptr += lda8;
}

if ( n2 & 4 )
{
xbuffer[0] = x_ptr[0];
x_ptr += inc_x;
@@ -301,7 +284,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
a_ptr += lda4;
}

for( i = 0; i < ( n2 & 3) ; i++)
for( i = 0; i < n2 ; i++)
{
xbuffer[0] = x_ptr[0];
x_ptr += inc_x;


Loading…
Cancel
Save