Browse Source

small optimizations for zgemv kernels

tags/v0.2.9.rc1
wernsaar 12 years ago
parent
commit
33d3ab6e09
2 changed files with 78 additions and 37 deletions
  1. +47
    -15
      kernel/arm/zgemv_n.c
  2. +31
    -22
      kernel/arm/zgemv_t.c

+ 47
- 15
kernel/arm/zgemv_n.c View File

@@ -26,7 +26,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

/**************************************************************************************
* * 2013/09/15 Saar
* * 2013/11/23 Saar
* * BLASTEST float : OK
* * BLASTEST double : OK
* CTEST : OK
@@ -48,20 +48,17 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
BLASLONG lda2;
BLASLONG i2;

if( alpha_r == 0.0 && alpha_i == 0.0 ) return(0);

lda2 = 2*lda;

inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y;

ix = 0;
a_ptr = a;

#if !defined(CONJ)
for (j=0; j<n; j++)
if ( inc_x == 1 && inc_y == 1 )
{

for (j=0; j<n; j++)
{

#if !defined(XCONJ)
temp_r = alpha_r * x[ix] - alpha_i * x[ix+1];
temp_i = alpha_r * x[ix+1] + alpha_i * x[ix];
@@ -70,9 +67,12 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
temp_i = alpha_r * x[ix+1] - alpha_i * x[ix];
#endif
iy = 0;
i2=0;

for (i=0; i<m; i++)
{
i2 = 2*i;
#if !defined(CONJ)

#if !defined(XCONJ)
y[iy] += temp_r * a_ptr[i2] - temp_i * a_ptr[i2+1];
y[iy+1] += temp_r * a_ptr[i2+1] + temp_i * a_ptr[i2];
@@ -81,13 +81,32 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
y[iy+1] += temp_r * a_ptr[i2+1] - temp_i * a_ptr[i2];
#endif

iy += inc_y2;
#else

#if !defined(XCONJ)
y[iy] += temp_r * a_ptr[i2] + temp_i * a_ptr[i2+1];
y[iy+1] -= temp_r * a_ptr[i2+1] - temp_i * a_ptr[i2];
#else
y[iy] += temp_r * a_ptr[i2] - temp_i * a_ptr[i2+1];
y[iy+1] -= temp_r * a_ptr[i2+1] + temp_i * a_ptr[i2];
#endif

#endif
i2 += 2;
iy += 2;
}
a_ptr += lda2;
ix += inc_x2;
ix += 2;
}

return(0);

}

inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y;

#else
for (j=0; j<n; j++)
{

@@ -99,9 +118,22 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
temp_i = alpha_r * x[ix+1] - alpha_i * x[ix];
#endif
iy = 0;
i2=0;

for (i=0; i<m; i++)
{
i2 = 2*i;
#if !defined(CONJ)

#if !defined(XCONJ)
y[iy] += temp_r * a_ptr[i2] - temp_i * a_ptr[i2+1];
y[iy+1] += temp_r * a_ptr[i2+1] + temp_i * a_ptr[i2];
#else
y[iy] += temp_r * a_ptr[i2] + temp_i * a_ptr[i2+1];
y[iy+1] += temp_r * a_ptr[i2+1] - temp_i * a_ptr[i2];
#endif

#else

#if !defined(XCONJ)
y[iy] += temp_r * a_ptr[i2] + temp_i * a_ptr[i2+1];
y[iy+1] -= temp_r * a_ptr[i2+1] - temp_i * a_ptr[i2];
@@ -110,6 +142,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
y[iy+1] -= temp_r * a_ptr[i2+1] + temp_i * a_ptr[i2];
#endif

#endif
i2 += 2;
iy += inc_y2;
}
a_ptr += lda2;
@@ -117,8 +151,6 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
}


#endif

return(0);
}


+ 31
- 22
kernel/arm/zgemv_t.c View File

@@ -26,7 +26,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

/**************************************************************************************
* * 2013/09/15 Saar
* * 2013/11/23 Saar
* * BLASTEST float : OK
* * BLASTEST double : OK
* CTEST : OK
@@ -48,33 +48,34 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
BLASLONG lda2;
BLASLONG i2;

if( alpha_r == 0.0 && alpha_i == 0.0 ) return(0);
lda2 = 2*lda;

inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y;

iy = 0;
a_ptr = a;

#if !defined(CONJ)
for (j=0; j<n; j++)
if ( inc_x == 1 && inc_y == 1 )
{

for (j=0; j<n; j++)
{
temp_r = 0.0;
temp_i = 0.0;
ix = 0;
i2=0;

for (i=0; i<m; i++)
{
i2 = 2*i;
#if !defined(XCONJ)
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
temp_r += a_ptr[i2] * x[ix] - a_ptr[i2+1] * x[ix+1];
temp_i += a_ptr[i2] * x[ix+1] + a_ptr[i2+1] * x[ix];
#else
temp_r += a_ptr[i2] * x[ix] + a_ptr[i2+1] * x[ix+1];
temp_i += a_ptr[i2] * x[ix+1] - a_ptr[i2+1] * x[ix];
#endif
ix += inc_x2;

i2 += 2;
ix += 2;
}

#if !defined(XCONJ)
@@ -86,46 +87,54 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
#endif

a_ptr += lda2;
iy += inc_y2;
iy += 2;
}

return(0);

}

#else

inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y;

for (j=0; j<n; j++)
{
temp_r = 0.0;
temp_i = 0.0;
ix = 0;
i2=0;

for (i=0; i<m; i++)
{
i2 = 2*i;
#if !defined(XCONJ)
temp_r += a_ptr[i2] * x[ix] + a_ptr[i2+1] * x[ix+1];
temp_i += a_ptr[i2] * x[ix+1] - a_ptr[i2+1] * x[ix];
#else

#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
temp_r += a_ptr[i2] * x[ix] - a_ptr[i2+1] * x[ix+1];
temp_i += a_ptr[i2] * x[ix+1] + a_ptr[i2+1] * x[ix];
#else
temp_r += a_ptr[i2] * x[ix] + a_ptr[i2+1] * x[ix+1];
temp_i += a_ptr[i2] * x[ix+1] - a_ptr[i2+1] * x[ix];
#endif

i2 += 2;
ix += inc_x2;
}


#if !defined(XCONJ)
y[iy] += alpha_r * temp_r - alpha_i * temp_i;
y[iy+1] += alpha_r * temp_i + alpha_i * temp_r;
#else
y[iy] += alpha_r * temp_r + alpha_i * temp_i;
y[iy+1] -= alpha_r * temp_i - alpha_i * temp_r;
y[iy] += alpha_r * temp_r + alpha_i * temp_i;
y[iy+1] -= alpha_r * temp_i - alpha_i * temp_r;
#endif

a_ptr += lda2;
iy += inc_y2;
}
#endif

return(0);

}



Loading…
Cancel
Save