Browse Source

support extra flag for NaN handling

tags/v0.3.30
Martin Kroeker GitHub 8 months ago
parent
commit
669c847ceb
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
2 changed files with 101 additions and 41 deletions
  1. +52
    -22
      kernel/x86_64/cscal.c
  2. +49
    -19
      kernel/x86_64/zscal.c

+ 52
- 22
kernel/x86_64/cscal.c View File

@@ -229,10 +229,9 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,


if ( da_i == 0.0 ) if ( da_i == 0.0 )
{ {
if (!dummy2) {
while(j < n1) while(j < n1)
{ {
x[i]=0.0; x[i]=0.0;
x[i+1]=0.0; x[i+1]=0.0;
x[i+inc_x]=0.0; x[i+inc_x]=0.0;
@@ -244,21 +243,48 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,


while(j < n) while(j < n)
{ {
x[i]=0.0; x[i]=0.0;
x[i+1]=0.0; x[i+1]=0.0;
i += inc_x ; i += inc_x ;
j++; j++;

} }
} else {
float temp;
while(j < n1)
{
if (isnan(x[i])|| isnan(x[i+1]))
temp=NAN;
else
temp=0.0;
x[i]=temp;
x[i+1]=temp;
if (isnan(x[i+inc_x])|| isnan(x[i+inc_x+1]))
temp=NAN;
else
temp=0.0;
x[i+inc_x]= temp;
x[i+inc_x+1]= temp;
i += 2*inc_x;
j+=2;


}
while(j < n)
{
if (isnan(x[i])|| isnan(x[i+1]))
temp=NAN;
else
temp=0.0;
x[i]=temp;
x[i+1]=temp;
i += inc_x;
j++;
}
}
} }
else else
{ {

while(j < n1) while(j < n1)
{ {
if (isnan(x[i]) || isinf(x[i])) if (isnan(x[i]) || isinf(x[i]))
temp0 = NAN; temp0 = NAN;
else else
@@ -278,7 +304,6 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
x[i+inc_x] = temp1; x[i+inc_x] = temp1;
i += 2*inc_x ; i += 2*inc_x ;
j+=2; j+=2;

} }


while(j < n) while(j < n)
@@ -305,14 +330,12 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
else else
{ {



if ( da_i == 0.0 )
if ( da_i == 0.0 && dummy2 )
{ {
BLASLONG n1 = n & -2; BLASLONG n1 = n & -2;


while(j < n1) while(j < n1)
{ {
temp0 = da_r * x[i]; temp0 = da_r * x[i];
x[i+1] = da_r * x[i+1]; x[i+1] = da_r * x[i+1];
x[i] = temp0; x[i] = temp0;
@@ -367,22 +390,19 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
return(0); return(0);
} }



BLASLONG n1 = n & -16; BLASLONG n1 = n & -16;
if ( n1 > 0 ) if ( n1 > 0 )
{ {


alpha[0] = da_r; alpha[0] = da_r;
alpha[1] = da_i; alpha[1] = da_i;
if ( da_r == 0.0 ) if ( da_r == 0.0 )
if ( da_i == 0 )
if ( da_i == 0 && !dummy2)
cscal_kernel_16_zero(n1 , alpha , x); cscal_kernel_16_zero(n1 , alpha , x);
else else
cscal_kernel_16_zero_r(n1 , alpha , x);
cscal_kernel_16/*_zero_r*/(n1 , alpha , x);
else else
cscal_kernel_16(n1 , alpha , x); cscal_kernel_16(n1 , alpha , x);

i = n1 << 1; i = n1 << 1;
j = n1; j = n1;
} }
@@ -393,6 +413,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
{ {
FLOAT res=0.0; FLOAT res=0.0;
if (isnan(da_r)) res= da_r; if (isnan(da_r)) res= da_r;
if (dummy2)
if (isnan(x[i])||isnan(x[i+1])) res= NAN;
while(j < n) while(j < n)
{ {
x[i]=res; x[i]=res;
@@ -415,7 +437,6 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,


} else } else
{ {

while(j < n) while(j < n)
{ {
temp0 = -da_i * x[i+1]; temp0 = -da_i * x[i+1];
@@ -424,11 +445,10 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
if (!isinf(x[i+1])) if (!isinf(x[i+1]))
x[i+1] = da_i * x[i]; x[i+1] = da_i * x[i];
else x[i+1] = NAN; else x[i+1] = NAN;
if ( x[i] == x[i]) //preserve NaN
if ( !isnan(x[i])) //preserve NaN
x[i] = temp0; x[i] = temp0;
i += 2 ; i += 2 ;
j++; j++;

} }


} }
@@ -439,12 +459,22 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,


if ( da_i == 0.0 ) if ( da_i == 0.0 )
{ {

while(j < n) while(j < n)
{ {
temp0 = da_r * x[i]; temp0 = da_r * x[i];
x[i+1] = da_r * x[i+1];
if (dummy2) {
if (isnan(x[i])||isinf(x[i])) temp0=NAN;
if (isnan(x[i+1])||isinf(x[i+1]))
x[i+1]=NAN;
else
x[i+1] = da_r * x[i+1];
} else {
if (isnan(x[i]))
x[i+1] = NAN;
else
x[i+1] = da_r * x[i+1];
}
x[i] = temp0; x[i] = temp0;
i += 2 ; i += 2 ;
j++; j++;
@@ -476,7 +506,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,


temp0 = da_r * x[i] - da_i * x[i+1]; temp0 = da_r * x[i] - da_i * x[i+1];
x[i+1] = da_r * x[i+1] + da_i * x[i]; x[i+1] = da_r * x[i+1] + da_i * x[i];
x[i] = temp0;
if(!isnan(x[i]))x[i] = temp0;
i += 2 ; i += 2 ;
j++; j++;




+ 49
- 19
kernel/x86_64/zscal.c View File

@@ -222,13 +222,14 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,


if ( da_r == 0.0 ) if ( da_r == 0.0 )
{ {

BLASLONG n1 = n & -2; BLASLONG n1 = n & -2;


if ( da_i == 0.0 ) if ( da_i == 0.0 )
{ {
if (!dummy2) {
while(j < n1) while(j < n1)
{ {

x[i]=0.0; x[i]=0.0;
x[i+1]=0.0; x[i+1]=0.0;
x[i+inc_x]=0.0; x[i+inc_x]=0.0;
@@ -245,9 +246,40 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
x[i+1]=0.0; x[i+1]=0.0;
i += inc_x ; i += inc_x ;
j++; j++;
}
} else {
float temp;
while(j < n1)
{
if (isnan(x[i])|| isnan(x[i+1]))
temp=NAN;
else
temp=0.0;
x[i]=temp;
x[i+1]=temp;
if (isnan(x[i+inc_x])|| isnan(x[i+inc_x+1]))
temp=NAN;
else
temp=0.0;
x[i+inc_x]= temp;
x[i+inc_x+1]= temp;
i += 2*inc_x;
j+=2;


} }
while(j < n)
{
if (isnan(x[i])|| isnan(x[i+1]))
temp=NAN;
else
temp=0.0;
x[i]=temp;
x[i+1]=temp;
i += inc_x;
j++;


}
}
} }
else else
{ {
@@ -260,7 +292,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
temp0 = -da_i * x[i+1]; temp0 = -da_i * x[i+1];
if (!isinf(x[i+1])) if (!isinf(x[i+1]))
x[i+1] = da_i * x[i]; x[i+1] = da_i * x[i];
else x[i+1] = NAN;
else x[i+1] = NAN;
x[i] = temp0; x[i] = temp0;
if (isnan(x[i+inc_x]) || isinf(x[i+inc_x])) if (isnan(x[i+inc_x]) || isinf(x[i+inc_x]))
temp1 = NAN; temp1 = NAN;
@@ -291,16 +323,13 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,


} }




} }


} }
else else
{ {



if ( da_i == 0.0 )
if ( da_i == 0.0 && dummy2)
{ {
BLASLONG n1 = n & -2; BLASLONG n1 = n & -2;


@@ -370,26 +399,27 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
alpha[1] = da_i; alpha[1] = da_i;


if ( da_r == 0.0 ) if ( da_r == 0.0 )
if ( da_i == 0 )
if ( da_i == 0 && !dummy2 )
zscal_kernel_8_zero(n1 , alpha , x); zscal_kernel_8_zero(n1 , alpha , x);
else else
// zscal_kernel_8_zero_r(n1 , alpha , x);
zscal_kernel_8(n1 , alpha , x); zscal_kernel_8(n1 , alpha , x);
else else
if ( da_i == 0 && da_r == da_r)
/* if ( da_i == 0 && da_r == da_r )
zscal_kernel_8_zero_i(n1 , alpha , x); zscal_kernel_8_zero_i(n1 , alpha , x);
else
else*/
zscal_kernel_8(n1 , alpha , x); zscal_kernel_8(n1 , alpha , x);
}
i = n1 << 1; i = n1 << 1;
j = n1; j = n1;
if ( da_r == 0.0 || da_r != da_r )
}
if ( da_r == 0.0 || isnan(da_r) )
{ {
if ( da_i == 0.0 ) if ( da_i == 0.0 )
{ {
FLOAT res=0.0;
if (da_r != da_r) res= da_r;
FLOAT res=0.0;
if (isnan(da_r)) res= da_r;
if (dummy2)
if (isnan(x[i])||isnan(x[i+1])) res= NAN;
while(j < n) while(j < n)
{ {
x[i]=res; x[i]=res;
@@ -412,7 +442,6 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,


} else } else
{ {

while(j < n) while(j < n)
{ {
temp0 = -da_i * x[i+1]; temp0 = -da_i * x[i+1];
@@ -421,7 +450,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
if (!isinf(x[i+1])) if (!isinf(x[i+1]))
x[i+1] = da_i * x[i]; x[i+1] = da_i * x[i];
else x[i+1] = NAN; else x[i+1] = NAN;
if ( x[i] == x[i]) //preserve NaN
if ( !isnan(x[i])) //preserve NaN
x[i] = temp0; x[i] = temp0;
i += 2 ; i += 2 ;
j++; j++;
@@ -437,8 +466,9 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
{ {
while(j < n) while(j < n)
{ {

temp0 = da_r * x[i]; temp0 = da_r * x[i];
if (isnan(x[i]))x[i+1]=NAN;
else
x[i+1] = da_r * x[i+1]; x[i+1] = da_r * x[i+1];
x[i] = temp0; x[i] = temp0;
i += 2 ; i += 2 ;
@@ -453,7 +483,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
{ {
temp0 = da_r * x[i] - da_i * x[i+1]; temp0 = da_r * x[i] - da_i * x[i+1];
x[i+1] = da_r * x[i+1] + da_i * x[i]; x[i+1] = da_r * x[i+1] + da_i * x[i];
x[i] = temp0;
if(!isnan(x[i]))x[i] = temp0;
i += 2 ; i += 2 ;
j++; j++;




Loading…
Cancel
Save