| @@ -56,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| BLASLONG max=0; | |||
| if (n <= 0 || inc_x <= 0) return(max); | |||
| if (n==1) return(1); | |||
| if (x[0]!=x[0]) return(1); | |||
| maxf=ABS(x[0]); | |||
| ix += inc_x; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if (x[ix]!=x[ix]) return(i+1); | |||
| if( ABS(x[ix]) > maxf ) | |||
| { | |||
| max = i; | |||
| @@ -56,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| BLASLONG min=0; | |||
| if (n <= 0 || inc_x <= 0) return(min); | |||
| minf=ABS(x[0]); | |||
| if (n==1) return(1); | |||
| if (x[0]!=x[0]) return(1); | |||
| minf=ABS(x[0]); | |||
| ix += inc_x; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if (x[ix]!=x[ix]) return(i+1); | |||
| if( ABS(x[ix]) < ABS(minf) ) | |||
| { | |||
| min = i; | |||
| @@ -1,5 +1,5 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2016, The OpenBLAS Project | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| @@ -25,6 +25,15 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : NoTest | |||
| * BLASTEST double : NoTest | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| @@ -47,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| BLASLONG max=0; | |||
| if (n <= 0 || inc_x <= 0) return(max); | |||
| if (n==1) return(1); | |||
| if (x[0]!=x[0]) return(1); | |||
| maxf=ABS(x[0]); | |||
| ix += inc_x; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if (x[ix]!=x[ix]) return(i+1); | |||
| if( ABS(x[ix]) > maxf ) | |||
| { | |||
| max = i; | |||
| @@ -1,5 +1,5 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2016, The OpenBLAS Project | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| @@ -25,6 +25,15 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : NoTest | |||
| * BLASTEST double : NoTest | |||
| * CTEST : NoTest | |||
| * TEST : NoTest | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| @@ -47,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| BLASLONG min=0; | |||
| if (n <= 0 || inc_x <= 0) return(min); | |||
| minf=ABS(x[0]); | |||
| if (n==1) return(1); | |||
| if (x[0]!=x[0]) return(1); | |||
| minf=ABS(x[0]); | |||
| ix += inc_x; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if (x[ix]!=x[ix]) return(i+1); | |||
| if( ABS(x[ix]) < ABS(minf) ) | |||
| { | |||
| min = i; | |||
| @@ -520,6 +520,19 @@ LL(1000): | |||
| .align 4 | |||
| LL(1010): | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f24, f24 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f25, f25 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f26, f26 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f27, f27 | |||
| bun cr0, LL(9999) | |||
| fabs f8, f24 | |||
| fabs f9, f25 | |||
| fabs f10, f26 | |||
| @@ -529,6 +542,20 @@ LL(1010): | |||
| LFD f25, 9 * SIZE(XX) | |||
| LFD f26, 10 * SIZE(XX) | |||
| LFD f27, 11 * SIZE(XX) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f24, f24 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f25, f25 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f26, f26 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f27, f27 | |||
| bun cr0, LL(9999) | |||
| subi RET, RET, 8 | |||
| fabs f12, f28 | |||
| fabs f13, f29 | |||
| @@ -577,6 +604,32 @@ LL(1010): | |||
| .align 4 | |||
| LL(1020): | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f24, f24 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f25, f25 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f26, f26 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f27, f27 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f28, f28 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f29, f29 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f30, f30 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f31, f31 | |||
| bun cr0, LL(9999) | |||
| subi RET, RET, 8 | |||
| fabs f8, f24 | |||
| fabs f9, f25 | |||
| fabs f10, f26 | |||
| @@ -631,8 +684,12 @@ LL(1050): | |||
| LL(1060): | |||
| LFD f8, 0 * SIZE(XX) | |||
| addi XX, XX, 1 * SIZE | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f8, f8 | |||
| bun cru, LL(9999) | |||
| fabs f8, f8 | |||
| addi RET, RET, 1 | |||
| //addi RET, RET, 1 | |||
| fcmpu cr0, f1, f8 | |||
| beq cr0, LL(9999) | |||
| bdnz LL(1060) | |||
| @@ -658,6 +715,18 @@ LL(1100): | |||
| .align 4 | |||
| LL(1110): | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f24, f24 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f25, f25 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f26, f26 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f27, f27 | |||
| bun cr0, LL(9999) | |||
| fabs f8, f24 | |||
| fabs f9, f25 | |||
| fabs f10, f26 | |||
| @@ -667,7 +736,19 @@ LL(1110): | |||
| LFDUX f25, XX, INCX | |||
| LFDUX f26, XX, INCX | |||
| LFDUX f27, XX, INCX | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f24, f24 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f25, f25 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f26, f26 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f27, f27 | |||
| bun cr0, LL(9999) | |||
| subi RET, RET, 8 | |||
| fabs f12, f28 | |||
| fabs f13, f29 | |||
| fabs f14, f30 | |||
| @@ -714,6 +795,30 @@ LL(1110): | |||
| .align 4 | |||
| LL(1120): | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f24, f24 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f25, f25 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f26, f26 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f27, f27 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f28, f28 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f29, f29 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f30, f30 | |||
| bun cr0, LL(9999) | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f31, f31 | |||
| subi RET, RET, 8 | |||
| fabs f8, f24 | |||
| fabs f9, f25 | |||
| fabs f10, f26 | |||
| @@ -765,8 +870,11 @@ LL(1150): | |||
| LL(1160): | |||
| LFDUX f8, XX, INCX | |||
| fabs f8, f8 | |||
| addi RET, RET, 1 | |||
| fcmpu cr0, f8, f8 | |||
| bun LL(9999) | |||
| fabs f8, f8 | |||
| // addi RET, RET, 1 | |||
| fcmpu cr0, f1, f8 | |||
| beq cr0, LL(9999) | |||
| bdnz LL(1160) | |||
| @@ -327,6 +327,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||
| BLASLONG max = 0; | |||
| if (n <= 0 || inc_x <= 0) return (max); | |||
| if (n == 1) return(1); | |||
| if (inc_x == 1) { | |||
| @@ -335,7 +336,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||
| BLASLONG n1 = n & -32; | |||
| if (n1 > 0) { | |||
| for (int ii=i;ii<i+32;ii++) if (x[ii]!=x[ii]) return(ii+1); | |||
| max = diamax_kernel_32(n1, x, &maxf); | |||
| i = n1; | |||
| @@ -343,6 +344,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||
| #endif | |||
| #endif | |||
| while (i < n) { | |||
| if (x[i] != x[i]) return(i+1); | |||
| if (ABS(x[i]) > maxf) { | |||
| max = i; | |||
| maxf = ABS(x[i]); | |||
| @@ -356,6 +358,10 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||
| BLASLONG n1 = n & -4; | |||
| while (j < n1) { | |||
| if (x[i] != x[i]) return(i+1); | |||
| if (x[i+inc_x] != x[i+inc_x]) return(j+1); | |||
| if (x[i+2*inc_x] != x[i+2*inc_x]) return(j+2); | |||
| if (x[i+3*inc_x] != x[i+3*inc_x]) return(j+3); | |||
| if (ABS(x[i]) > maxf) { | |||
| max = j; | |||
| maxf = ABS(x[i]); | |||
| @@ -381,6 +387,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||
| while (j < n) { | |||
| if (x[i] != x[i]) return(j+1); | |||
| if (ABS(x[i]) > maxf) { | |||
| max = j; | |||
| maxf = ABS(x[i]); | |||
| @@ -58,6 +58,78 @@ static BLASLONG siamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *maxf) { | |||
| register __vector float quadruple_values={0,0,0,0}; | |||
| register __vector float * v_ptrx=(__vector float *)x; | |||
| for(; i<n; i+=64){ | |||
| if (vec_any_nan(v_ptrx[0])) { | |||
| float d=vec_extract(v_ptrx[0],0); | |||
| if (d!=d) return(i+0); | |||
| d=vec_extract(v_ptrx[0],1); | |||
| if (d!=d) return(i+1); | |||
| d=vec_extract(v_ptrx[0],2); | |||
| if (d!=d) return(i+2); | |||
| return(i+3); | |||
| } | |||
| if (vec_any_nan(v_ptrx[1])) { | |||
| float d=vec_extract(v_ptrx[1],0); | |||
| if (d!=d) return(i+4+0); | |||
| d=vec_extract(v_ptrx[1],1); | |||
| if (d!=d) return(i+4+1); | |||
| d=vec_extract(v_ptrx[1],2); | |||
| if (d!=d) return(i+4+2); | |||
| return(i+4+3); | |||
| } | |||
| if (vec_any_nan(v_ptrx[2])) { | |||
| float d=vec_extract(v_ptrx[2],0); | |||
| if (d!=d) return(i+8+0); | |||
| d=vec_extract(v_ptrx[2],1); | |||
| if (d!=d) return(i+8+1); | |||
| d=vec_extract(v_ptrx[2],2); | |||
| if (d!=d) return(i+8+2); | |||
| return(i+8+3); | |||
| } | |||
| if (vec_any_nan(v_ptrx[3])) { | |||
| float d=vec_extract(v_ptrx[3],0); | |||
| if (d!=d) return(i+12+0); | |||
| d=vec_extract(v_ptrx[3],1); | |||
| if (d!=d) return(i+12+1); | |||
| d=vec_extract(v_ptrx[3],2); | |||
| if (d!=d) return(i+12+2); | |||
| return(i+12+3); | |||
| } | |||
| if (vec_any_nan(v_ptrx[4])) { | |||
| float d=vec_extract(v_ptrx[4],0); | |||
| if (d!=d) return(i+16+0); | |||
| d=vec_extract(v_ptrx[4],1); | |||
| if (d!=d) return(i+16+1); | |||
| d=vec_extract(v_ptrx[4],2); | |||
| if (d!=d) return(i+16+2); | |||
| return(i+16+3); | |||
| } | |||
| if (vec_any_nan(v_ptrx[5])) { | |||
| float d=vec_extract(v_ptrx[5],0); | |||
| if (d!=d) return(i+20+0); | |||
| d=vec_extract(v_ptrx[5],1); | |||
| if (d!=d) return(i+20+1); | |||
| d=vec_extract(v_ptrx[5],2); | |||
| if (d!=d) return(i+20+2); | |||
| return(i+20+3); | |||
| } | |||
| if (vec_any_nan(v_ptrx[6])) { | |||
| float d=vec_extract(v_ptrx[6],0); | |||
| if (d!=d) return(i+24+0); | |||
| d=vec_extract(v_ptrx[6],1); | |||
| if (d!=d) return(i+24+1); | |||
| d=vec_extract(v_ptrx[6],2); | |||
| if (d!=d) return(i+24+2); | |||
| return(i+24+3); | |||
| } | |||
| if (vec_any_nan(v_ptrx[7])) { | |||
| float d=vec_extract(v_ptrx[7],0); | |||
| if (d!=d) return(i+28+0); | |||
| d=vec_extract(v_ptrx[7],1); | |||
| if (d!=d) return(i+28+1); | |||
| d=vec_extract(v_ptrx[7],2); | |||
| if (d!=d) return(i+28+2); | |||
| return(i+28+3); | |||
| } | |||
| //absolute temporary vectors | |||
| register __vector float v0=vec_abs(v_ptrx[0]); | |||
| register __vector float v1=vec_abs(v_ptrx[1]); | |||
| @@ -226,7 +298,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||
| BLASLONG max = 0; | |||
| if (n <= 0 || inc_x <= 0) return (max); | |||
| if (x[0] != x[0]) return(1); | |||
| if (inc_x == 1) { | |||
| BLASLONG n1 = n & -64; | |||
| @@ -238,6 +310,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||
| } | |||
| while (i < n) { | |||
| if (x[i] != x[i]) return(i+1); | |||
| if (ABS(x[i]) > maxf) { | |||
| max = i; | |||
| maxf = ABS(x[i]); | |||
| @@ -251,18 +324,22 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||
| BLASLONG n1 = n & -4; | |||
| while (j < n1) { | |||
| if (x[i] != x[i]) return(j+1); | |||
| if (ABS(x[i]) > maxf) { | |||
| max = j; | |||
| maxf = ABS(x[i]); | |||
| } | |||
| if (x[i+inc_x] != x[i+inc_x]) return(j+1); | |||
| if (ABS(x[i + inc_x]) > maxf) { | |||
| max = j + 1; | |||
| maxf = ABS(x[i + inc_x]); | |||
| } | |||
| if (x[i+2*inc_x] != x[i+2*inc_x]) return(j+2); | |||
| if (ABS(x[i + 2 * inc_x]) > maxf) { | |||
| max = j + 2; | |||
| maxf = ABS(x[i + 2 * inc_x]); | |||
| } | |||
| if (x[i+3*inc_x] != x[i+3*inc_x]) return(j+3); | |||
| if (ABS(x[i + 3 * inc_x]) > maxf) { | |||
| max = j + 3; | |||
| maxf = ABS(x[i + 3 * inc_x]); | |||
| @@ -276,6 +353,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||
| while (j < n) { | |||
| if (x[i] != x[i]) return(j+1); | |||
| if (ABS(x[i]) > maxf) { | |||
| max = j; | |||
| maxf = ABS(x[i]); | |||
| @@ -56,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| BLASLONG max=0; | |||
| if (n <= 0 || inc_x <= 0) return(max); | |||
| if (n==1) return(1); | |||
| if (x[0]!=x[0]) return(1); | |||
| maxf=ABS(x[0]); | |||
| ix += inc_x; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if (x[ix]!=x[ix]) return(i+1); | |||
| if( ABS(x[ix]) > maxf ) | |||
| { | |||
| max = i; | |||
| @@ -56,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| BLASLONG min=0; | |||
| if (n <= 0 || inc_x <= 0) return(min); | |||
| minf=ABS(x[0]); | |||
| if (n==1) return(1); | |||
| if (x[0]!=x[0]) return(1); | |||
| minf=ABS(x[0]); | |||
| ix += inc_x; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if (x[ix]!=x[ix]) return(i+1); | |||
| if( ABS(x[ix]) < ABS(minf) ) | |||
| { | |||
| min = i; | |||
| @@ -100,6 +100,8 @@ | |||
| movl $1, RET | |||
| FLD (X) | |||
| fcomi %st, %st | |||
| jp .L999 | |||
| #ifdef USE_ABS | |||
| fabs | |||
| #endif | |||
| @@ -121,6 +123,8 @@ | |||
| #endif | |||
| FLD 0 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| #ifdef USE_ABS | |||
| fabs | |||
| #endif | |||
| @@ -131,6 +135,8 @@ | |||
| incl NUM | |||
| FLD 1 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| #ifdef USE_ABS | |||
| fabs | |||
| #endif | |||
| @@ -141,6 +147,8 @@ | |||
| incl NUM | |||
| FLD 2 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| #ifdef USE_ABS | |||
| fabs | |||
| #endif | |||
| @@ -151,6 +159,8 @@ | |||
| incl NUM | |||
| FLD 3 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| #ifdef USE_ABS | |||
| fabs | |||
| #endif | |||
| @@ -161,6 +171,8 @@ | |||
| incl NUM | |||
| FLD 4 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| #ifdef USE_ABS | |||
| fabs | |||
| #endif | |||
| @@ -171,6 +183,8 @@ | |||
| incl NUM | |||
| FLD 5 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| #ifdef USE_ABS | |||
| fabs | |||
| #endif | |||
| @@ -181,6 +195,8 @@ | |||
| incl NUM | |||
| FLD 6 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| #ifdef USE_ABS | |||
| fabs | |||
| #endif | |||
| @@ -191,6 +207,8 @@ | |||
| incl NUM | |||
| FLD 7 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| #ifdef USE_ABS | |||
| fabs | |||
| #endif | |||
| @@ -215,6 +233,8 @@ | |||
| .L21: | |||
| FLD 0 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| #ifdef USE_ABS | |||
| fabs | |||
| #endif | |||
| @@ -238,6 +258,8 @@ | |||
| .L50: | |||
| FLD 0 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| addl INCX, X | |||
| #ifdef USE_ABS | |||
| fabs | |||
| @@ -249,6 +271,8 @@ | |||
| incl NUM | |||
| FLD 0 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| addl INCX, X | |||
| #ifdef USE_ABS | |||
| fabs | |||
| @@ -260,6 +284,8 @@ | |||
| incl NUM | |||
| FLD 0 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| addl INCX, X | |||
| #ifdef USE_ABS | |||
| fabs | |||
| @@ -271,6 +297,8 @@ | |||
| incl NUM | |||
| FLD 0 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| addl INCX, X | |||
| #ifdef USE_ABS | |||
| fabs | |||
| @@ -282,6 +310,8 @@ | |||
| incl NUM | |||
| FLD 0 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| addl INCX, X | |||
| #ifdef USE_ABS | |||
| fabs | |||
| @@ -293,6 +323,8 @@ | |||
| incl NUM | |||
| FLD 0 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| addl INCX, X | |||
| #ifdef USE_ABS | |||
| fabs | |||
| @@ -304,6 +336,8 @@ | |||
| incl NUM | |||
| FLD 0 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| addl INCX, X | |||
| #ifdef USE_ABS | |||
| fabs | |||
| @@ -315,6 +349,8 @@ | |||
| incl NUM | |||
| FLD 0 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| addl INCX, X | |||
| #ifdef USE_ABS | |||
| fabs | |||
| @@ -338,6 +374,8 @@ | |||
| .L61: | |||
| FLD 0 * SIZE(X) | |||
| fucomi | |||
| jp .L998 | |||
| #ifdef USE_ABS | |||
| fabs | |||
| #endif | |||
| @@ -361,4 +399,7 @@ | |||
| popl %ebp | |||
| ret | |||
| .L998: mov NUM, RET | |||
| jmp .L999 | |||
| EPILOGUE | |||
| @@ -93,6 +93,10 @@ | |||
| addq INCX, X | |||
| decq M | |||
| shufps $0, %xmm0, %xmm0 | |||
| incq RET | |||
| comiss %xmm0, %xmm0 | |||
| jp .L999 | |||
| decq RET | |||
| #ifdef USE_ABS | |||
| andps %xmm15, %xmm0 | |||
| #endif | |||
| @@ -254,6 +258,10 @@ | |||
| decq M | |||
| addq $SIZE, X | |||
| incq RET | |||
| comiss %xmm1, %xmm1 | |||
| jp .L998 | |||
| decq RET | |||
| #ifdef USE_ABS | |||
| andps %xmm15, %xmm1 | |||
| #endif | |||
| @@ -268,6 +276,14 @@ | |||
| movss 0 * SIZE(X), %xmm1 | |||
| movss 1 * SIZE(X), %xmm2 | |||
| incq RET | |||
| comiss %xmm1, %xmm1 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm2, %xmm2 | |||
| jp .L998 | |||
| decq RET | |||
| decq RET | |||
| subq $2, M | |||
| addq $2 * SIZE, X | |||
| @@ -332,6 +348,31 @@ | |||
| movss 5 * SIZE(X), %xmm6 | |||
| movss 6 * SIZE(X), %xmm7 | |||
| movss 7 * SIZE(X), %xmm8 | |||
| incq RET | |||
| comiss %xmm1, %xmm1 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm2, %xmm2 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm3, %xmm3 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm4, %xmm4 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm5, %xmm5 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm6, %xmm6 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm7, %xmm7 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm8, %xmm8 | |||
| jp .L998 | |||
| subq $8, RET | |||
| #ifdef USE_ABS | |||
| andps %xmm15, %xmm1 | |||
| andps %xmm15, %xmm2 | |||
| @@ -378,6 +419,19 @@ | |||
| movss 1 * SIZE(X), %xmm2 | |||
| movss 2 * SIZE(X), %xmm3 | |||
| movss 3 * SIZE(X), %xmm4 | |||
| incq RET | |||
| comiss %xmm1, %xmm1 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm2, %xmm2 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm3, %xmm3 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm4, %xmm4 | |||
| jp .L998 | |||
| subq $4, RET | |||
| #ifdef USE_ABS | |||
| andps %xmm15, %xmm1 | |||
| andps %xmm15, %xmm2 | |||
| @@ -405,6 +459,13 @@ | |||
| movss 0 * SIZE(X), %xmm1 | |||
| movss 1 * SIZE(X), %xmm2 | |||
| incq RET | |||
| comiss %xmm1, %xmm1 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm2, %xmm2 | |||
| jp .L998 | |||
| subq $2, RET | |||
| #ifdef USE_ABS | |||
| andps %xmm15, %xmm1 | |||
| andps %xmm15, %xmm2 | |||
| @@ -593,6 +654,31 @@ | |||
| movss 5 * SIZE(X), %xmm6 | |||
| movss 6 * SIZE(X), %xmm7 | |||
| movss 7 * SIZE(X), %xmm8 | |||
| incq RET | |||
| comiss %xmm1, %xmm1 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm2, %xmm2 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm3, %xmm3 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm4, %xmm4 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm5, %xmm5 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm6, %xmm6 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm7, %xmm7 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm8, %xmm8 | |||
| jp .L998 | |||
| subq $8, RET | |||
| #ifdef USE_ABS | |||
| andps %xmm15, %xmm1 | |||
| andps %xmm15, %xmm2 | |||
| @@ -639,6 +725,19 @@ | |||
| movss 1 * SIZE(X), %xmm2 | |||
| movss 2 * SIZE(X), %xmm3 | |||
| movss 3 * SIZE(X), %xmm4 | |||
| incq RET | |||
| comiss %xmm1, %xmm1 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm2, %xmm2 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm3, %xmm3 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm4, %xmm4 | |||
| jp .L998 | |||
| subq $4, RET | |||
| #ifdef USE_ABS | |||
| andps %xmm15, %xmm1 | |||
| andps %xmm15, %xmm2 | |||
| @@ -666,6 +765,13 @@ | |||
| movss 0 * SIZE(X), %xmm1 | |||
| movss 1 * SIZE(X), %xmm2 | |||
| incq RET | |||
| comiss %xmm1, %xmm1 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm2, %xmm2 | |||
| jp .L998 | |||
| subq $2, RET | |||
| #ifdef USE_ABS | |||
| andps %xmm15, %xmm1 | |||
| andps %xmm15, %xmm2 | |||
| @@ -885,6 +991,31 @@ | |||
| movss 0 * SIZE(X), %xmm2 | |||
| subq INCX, X | |||
| movss 0 * SIZE(X), %xmm1 | |||
| incq RET | |||
| comiss %xmm1, %xmm1 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm2, %xmm2 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm3, %xmm3 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm4, %xmm4 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm5, %xmm5 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm6, %xmm6 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm7, %xmm7 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm8, %xmm8 | |||
| jp .L998 | |||
| subq $8, RET | |||
| #ifdef USE_ABS | |||
| andps %xmm15, %xmm1 | |||
| andps %xmm15, %xmm2 | |||
| @@ -932,7 +1063,19 @@ | |||
| addq INCX, X | |||
| movss 0 * SIZE(X), %xmm4 | |||
| addq INCX, X | |||
| incq RET | |||
| comiss %xmm1, %xmm1 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm2, %xmm2 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm3, %xmm3 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm4, %xmm4 | |||
| jp .L998 | |||
| subq $4, RET | |||
| #ifdef USE_ABS | |||
| andps %xmm15, %xmm1 | |||
| andps %xmm15, %xmm2 | |||
| @@ -961,6 +1104,13 @@ | |||
| addq INCX, X | |||
| movss 0 * SIZE(X), %xmm2 | |||
| addq INCX, X | |||
| incq RET | |||
| comiss %xmm1, %xmm1 | |||
| jp .L998 | |||
| incq RET | |||
| comiss %xmm2, %xmm2 | |||
| jp .L998 | |||
| subq $2, RET | |||
| #ifdef USE_ABS | |||
| andps %xmm15, %xmm1 | |||
| andps %xmm15, %xmm2 | |||
| @@ -982,5 +1132,9 @@ | |||
| ret | |||
| .L998: | |||
| // incq RET | |||
| jmp .L999 | |||
| EPILOGUE | |||
| @@ -79,6 +79,8 @@ | |||
| movsd (X), %xmm0 | |||
| addq INCX, X | |||
| decq M | |||
| comisd %xmm0, %xmm0 | |||
| jp .L987 | |||
| #ifdef USE_ABS | |||
| andpd %xmm15, %xmm0 | |||
| #endif | |||
| @@ -269,6 +271,11 @@ | |||
| je .L21 | |||
| movsd 0 * SIZE(X), %xmm1 | |||
| incq RET | |||
| comisd %xmm1, %xmm1 | |||
| jp .L987 | |||
| decq RET | |||
| #ifdef USE_ABS | |||
| andpd %xmm15, %xmm1 | |||
| #endif | |||
| @@ -342,6 +349,32 @@ | |||
| movsd 5 * SIZE(X), %xmm6 | |||
| movsd 6 * SIZE(X), %xmm7 | |||
| movsd 7 * SIZE(X), %xmm8 | |||
| incq RET | |||
| comisd %xmm1, %xmm1 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm2, %xmm2 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm3, %xmm3 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm4, %xmm4 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm5, %xmm5 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm6, %xmm6 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm7, %xmm7 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm8, %xmm8 | |||
| jp .L987 | |||
| subq $8, RET | |||
| #ifdef USE_ABS | |||
| andpd %xmm15, %xmm1 | |||
| andpd %xmm15, %xmm2 | |||
| @@ -374,9 +407,9 @@ | |||
| comisd %xmm0, %xmm6 | |||
| je .L999 | |||
| incq RET | |||
| comisd %xmm0, %xmm7 | |||
| je .L999 | |||
| incq RET | |||
| comisd %xmm0, %xmm7 | |||
| je .L999 | |||
| incq RET | |||
| jmp .L999 | |||
| ALIGN_3 | |||
| @@ -388,6 +421,21 @@ | |||
| movsd 1 * SIZE(X), %xmm2 | |||
| movsd 2 * SIZE(X), %xmm3 | |||
| movsd 3 * SIZE(X), %xmm4 | |||
| incq RET | |||
| comisd %xmm1, %xmm1 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm2, %xmm2 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm3, %xmm3 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm4, %xmm4 | |||
| jp .L987 | |||
| subq $4, RET | |||
| #ifdef USE_ABS | |||
| andpd %xmm15, %xmm1 | |||
| andpd %xmm15, %xmm2 | |||
| @@ -415,6 +463,14 @@ | |||
| movsd 0 * SIZE(X), %xmm1 | |||
| movsd 1 * SIZE(X), %xmm2 | |||
| incq RET | |||
| comisd %xmm1, %xmm1 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm2, %xmm2 | |||
| jp .L987 | |||
| subq $2, RET | |||
| #ifdef USE_ABS | |||
| andpd %xmm15, %xmm1 | |||
| andpd %xmm15, %xmm2 | |||
| @@ -670,6 +726,32 @@ | |||
| movsd 5 * SIZE(X), %xmm6 | |||
| movsd 6 * SIZE(X), %xmm7 | |||
| movsd 7 * SIZE(X), %xmm8 | |||
| incq RET | |||
| comisd %xmm1, %xmm1 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm2, %xmm2 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm3, %xmm3 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm4, %xmm4 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm5, %xmm5 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm6, %xmm6 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm7, %xmm7 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm8, %xmm8 | |||
| jp .L987 | |||
| subq $8, RET | |||
| #ifdef USE_ABS | |||
| andpd %xmm15, %xmm1 | |||
| andpd %xmm15, %xmm2 | |||
| @@ -716,6 +798,20 @@ | |||
| movsd 1 * SIZE(X), %xmm2 | |||
| movsd 2 * SIZE(X), %xmm3 | |||
| movsd 3 * SIZE(X), %xmm4 | |||
| incq RET | |||
| comisd %xmm1, %xmm1 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm2, %xmm2 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm3, %xmm3 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm4, %xmm4 | |||
| jp .L987 | |||
| subq $4, RET | |||
| #ifdef USE_ABS | |||
| andpd %xmm15, %xmm1 | |||
| andpd %xmm15, %xmm2 | |||
| @@ -743,11 +839,21 @@ | |||
| movsd 0 * SIZE(X), %xmm1 | |||
| movsd 1 * SIZE(X), %xmm2 | |||
| incq RET | |||
| comisd %xmm1, %xmm1 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm2, %xmm2 | |||
| jp .L987 | |||
| subq $2, RET | |||
| #ifdef USE_ABS | |||
| andpd %xmm15, %xmm1 | |||
| andpd %xmm15, %xmm2 | |||
| #endif | |||
| addq $2 * SIZE, X | |||
| comisd %xmm0, %xmm0 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm0, %xmm1 | |||
| je .L999 | |||
| @@ -962,6 +1068,7 @@ | |||
| ALIGN_4 | |||
| .L92: | |||
| movsd 0 * SIZE(X), %xmm1 | |||
| addq INCX, X | |||
| movhpd 0 * SIZE(X), %xmm1 | |||
| @@ -1033,6 +1140,32 @@ | |||
| movsd 0 * SIZE(X), %xmm2 | |||
| subq INCX, X | |||
| movsd 0 * SIZE(X), %xmm1 | |||
| incq RET | |||
| comisd %xmm1, %xmm1 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm2, %xmm2 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm3, %xmm3 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm4, %xmm4 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm5, %xmm5 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm6, %xmm6 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm7, %xmm7 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm8, %xmm8 | |||
| jp .L987 | |||
| subq $8, RET | |||
| #ifdef USE_ABS | |||
| andpd %xmm15, %xmm1 | |||
| andpd %xmm15, %xmm2 | |||
| @@ -1083,6 +1216,20 @@ | |||
| addq INCX, X | |||
| movsd 0 * SIZE(X), %xmm4 | |||
| addq INCX, X | |||
| incq RET | |||
| comisd %xmm1, %xmm1 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm2, %xmm2 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm3, %xmm3 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm4, %xmm4 | |||
| jp .L987 | |||
| subq $4, RET | |||
| #ifdef USE_ABS | |||
| andpd %xmm15, %xmm1 | |||
| andpd %xmm15, %xmm2 | |||
| @@ -1111,6 +1258,14 @@ | |||
| addq INCX, X | |||
| movsd 0 * SIZE(X), %xmm2 | |||
| addq INCX, X | |||
| incq RET | |||
| comisd %xmm1, %xmm1 | |||
| jp .L987 | |||
| incq RET | |||
| comisd %xmm2, %xmm2 | |||
| jp .L987 | |||
| decq RET | |||
| decq RET | |||
| #ifdef USE_ABS | |||
| andpd %xmm15, %xmm1 | |||
| andpd %xmm15, %xmm2 | |||
| @@ -1122,7 +1277,6 @@ | |||
| comisd %xmm0, %xmm2 | |||
| je .L999 | |||
| ALIGN_3 | |||
| .L98: | |||
| incq RET | |||
| ALIGN_3 | |||
| @@ -1132,5 +1286,8 @@ | |||
| ret | |||
| EPILOGUE | |||
| .L987: | |||
| incq RET //count from xmm0 | |||
| jmp .L999 | |||
| EPILOGUE | |||
| @@ -58,3 +58,29 @@ CTEST(amax, damax){ | |||
| } | |||
| #endif | |||
| #ifdef BUILD_SINGLE | |||
| CTEST(amax, isamax_nan){ | |||
| blasint N=3, inc=1; | |||
| int te_max=0, tr_max=2; | |||
| float x[]={1., 0./0., 0./0. }; | |||
| //float x[]={ 0./0., 2., 3. }; | |||
| te_max=BLASFUNC(isamax)(&N, x, &inc); | |||
| ASSERT_EQUAL(tr_max, te_max); | |||
| } | |||
| #endif | |||
| #ifdef BUILD_DOUBLE | |||
| CTEST(amax, idamax_nan){ | |||
| blasint N=4, inc=1; | |||
| int te_max=0, tr_max=1; | |||
| //float x[]={1., 0./0., 0./0. }; | |||
| double x[]={ 0./0., 1.,2. ,3.}; | |||
| te_max=BLASFUNC(idamax)(&N, x, &inc); | |||
| ASSERT_EQUAL(tr_max, te_max); | |||
| } | |||
| #endif | |||