| @@ -56,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||||
| BLASLONG max=0; | BLASLONG max=0; | ||||
| if (n <= 0 || inc_x <= 0) return(max); | if (n <= 0 || inc_x <= 0) return(max); | ||||
| if (n==1) return(1); | |||||
| if (x[0]!=x[0]) return(1); | |||||
| maxf=ABS(x[0]); | maxf=ABS(x[0]); | ||||
| ix += inc_x; | ix += inc_x; | ||||
| i++; | i++; | ||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| if (x[ix]!=x[ix]) return(i+1); | |||||
| if( ABS(x[ix]) > maxf ) | if( ABS(x[ix]) > maxf ) | ||||
| { | { | ||||
| max = i; | max = i; | ||||
| @@ -56,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||||
| BLASLONG min=0; | BLASLONG min=0; | ||||
| if (n <= 0 || inc_x <= 0) return(min); | if (n <= 0 || inc_x <= 0) return(min); | ||||
| minf=ABS(x[0]); | |||||
| if (n==1) return(1); | |||||
| if (x[0]!=x[0]) return(1); | |||||
| minf=ABS(x[0]); | |||||
| ix += inc_x; | ix += inc_x; | ||||
| i++; | i++; | ||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| if (x[ix]!=x[ix]) return(i+1); | |||||
| if( ABS(x[ix]) < ABS(minf) ) | if( ABS(x[ix]) < ABS(minf) ) | ||||
| { | { | ||||
| min = i; | min = i; | ||||
| @@ -1,5 +1,5 @@ | |||||
| /*************************************************************************** | /*************************************************************************** | ||||
| Copyright (c) 2016, The OpenBLAS Project | |||||
| Copyright (c) 2013, The OpenBLAS Project | |||||
| All rights reserved. | All rights reserved. | ||||
| Redistribution and use in source and binary forms, with or without | Redistribution and use in source and binary forms, with or without | ||||
| modification, are permitted provided that the following conditions are | modification, are permitted provided that the following conditions are | ||||
| @@ -25,6 +25,15 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| /************************************************************************************** | |||||
| * 2013/09/14 Saar | |||||
| * BLASTEST float : NoTest | |||||
| * BLASTEST double : NoTest | |||||
| * CTEST : OK | |||||
| * TEST : OK | |||||
| * | |||||
| **************************************************************************************/ | |||||
| #include "common.h" | #include "common.h" | ||||
| #include <math.h> | #include <math.h> | ||||
| @@ -47,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||||
| BLASLONG max=0; | BLASLONG max=0; | ||||
| if (n <= 0 || inc_x <= 0) return(max); | if (n <= 0 || inc_x <= 0) return(max); | ||||
| if (n==1) return(1); | |||||
| if (x[0]!=x[0]) return(1); | |||||
| maxf=ABS(x[0]); | maxf=ABS(x[0]); | ||||
| ix += inc_x; | ix += inc_x; | ||||
| i++; | i++; | ||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| if (x[ix]!=x[ix]) return(i+1); | |||||
| if( ABS(x[ix]) > maxf ) | if( ABS(x[ix]) > maxf ) | ||||
| { | { | ||||
| max = i; | max = i; | ||||
| @@ -1,5 +1,5 @@ | |||||
| /*************************************************************************** | /*************************************************************************** | ||||
| Copyright (c) 2016, The OpenBLAS Project | |||||
| Copyright (c) 2013, The OpenBLAS Project | |||||
| All rights reserved. | All rights reserved. | ||||
| Redistribution and use in source and binary forms, with or without | Redistribution and use in source and binary forms, with or without | ||||
| modification, are permitted provided that the following conditions are | modification, are permitted provided that the following conditions are | ||||
| @@ -25,6 +25,15 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| /************************************************************************************** | |||||
| * 2013/09/14 Saar | |||||
| * BLASTEST float : NoTest | |||||
| * BLASTEST double : NoTest | |||||
| * CTEST : NoTest | |||||
| * TEST : NoTest | |||||
| * | |||||
| **************************************************************************************/ | |||||
| #include "common.h" | #include "common.h" | ||||
| #include <math.h> | #include <math.h> | ||||
| @@ -47,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||||
| BLASLONG min=0; | BLASLONG min=0; | ||||
| if (n <= 0 || inc_x <= 0) return(min); | if (n <= 0 || inc_x <= 0) return(min); | ||||
| minf=ABS(x[0]); | |||||
| if (n==1) return(1); | |||||
| if (x[0]!=x[0]) return(1); | |||||
| minf=ABS(x[0]); | |||||
| ix += inc_x; | ix += inc_x; | ||||
| i++; | i++; | ||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| if (x[ix]!=x[ix]) return(i+1); | |||||
| if( ABS(x[ix]) < ABS(minf) ) | if( ABS(x[ix]) < ABS(minf) ) | ||||
| { | { | ||||
| min = i; | min = i; | ||||
| @@ -520,6 +520,19 @@ LL(1000): | |||||
| .align 4 | .align 4 | ||||
| LL(1010): | LL(1010): | ||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f24, f24 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f25, f25 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f26, f26 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f27, f27 | |||||
| bun cr0, LL(9999) | |||||
| fabs f8, f24 | fabs f8, f24 | ||||
| fabs f9, f25 | fabs f9, f25 | ||||
| fabs f10, f26 | fabs f10, f26 | ||||
| @@ -529,6 +542,20 @@ LL(1010): | |||||
| LFD f25, 9 * SIZE(XX) | LFD f25, 9 * SIZE(XX) | ||||
| LFD f26, 10 * SIZE(XX) | LFD f26, 10 * SIZE(XX) | ||||
| LFD f27, 11 * SIZE(XX) | LFD f27, 11 * SIZE(XX) | ||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f24, f24 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f25, f25 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f26, f26 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f27, f27 | |||||
| bun cr0, LL(9999) | |||||
| subi RET, RET, 8 | |||||
| fabs f12, f28 | fabs f12, f28 | ||||
| fabs f13, f29 | fabs f13, f29 | ||||
| @@ -577,6 +604,32 @@ LL(1010): | |||||
| .align 4 | .align 4 | ||||
| LL(1020): | LL(1020): | ||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f24, f24 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f25, f25 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f26, f26 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f27, f27 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f28, f28 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f29, f29 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f30, f30 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f31, f31 | |||||
| bun cr0, LL(9999) | |||||
| subi RET, RET, 8 | |||||
| fabs f8, f24 | fabs f8, f24 | ||||
| fabs f9, f25 | fabs f9, f25 | ||||
| fabs f10, f26 | fabs f10, f26 | ||||
| @@ -631,8 +684,12 @@ LL(1050): | |||||
| LL(1060): | LL(1060): | ||||
| LFD f8, 0 * SIZE(XX) | LFD f8, 0 * SIZE(XX) | ||||
| addi XX, XX, 1 * SIZE | addi XX, XX, 1 * SIZE | ||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f8, f8 | |||||
| bun cru, LL(9999) | |||||
| fabs f8, f8 | fabs f8, f8 | ||||
| addi RET, RET, 1 | |||||
| //addi RET, RET, 1 | |||||
| fcmpu cr0, f1, f8 | fcmpu cr0, f1, f8 | ||||
| beq cr0, LL(9999) | beq cr0, LL(9999) | ||||
| bdnz LL(1060) | bdnz LL(1060) | ||||
| @@ -658,6 +715,18 @@ LL(1100): | |||||
| .align 4 | .align 4 | ||||
| LL(1110): | LL(1110): | ||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f24, f24 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f25, f25 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f26, f26 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f27, f27 | |||||
| bun cr0, LL(9999) | |||||
| fabs f8, f24 | fabs f8, f24 | ||||
| fabs f9, f25 | fabs f9, f25 | ||||
| fabs f10, f26 | fabs f10, f26 | ||||
| @@ -667,7 +736,19 @@ LL(1110): | |||||
| LFDUX f25, XX, INCX | LFDUX f25, XX, INCX | ||||
| LFDUX f26, XX, INCX | LFDUX f26, XX, INCX | ||||
| LFDUX f27, XX, INCX | LFDUX f27, XX, INCX | ||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f24, f24 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f25, f25 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f26, f26 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f27, f27 | |||||
| bun cr0, LL(9999) | |||||
| subi RET, RET, 8 | |||||
| fabs f12, f28 | fabs f12, f28 | ||||
| fabs f13, f29 | fabs f13, f29 | ||||
| fabs f14, f30 | fabs f14, f30 | ||||
| @@ -714,6 +795,30 @@ LL(1110): | |||||
| .align 4 | .align 4 | ||||
| LL(1120): | LL(1120): | ||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f24, f24 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f25, f25 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f26, f26 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f27, f27 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f28, f28 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f29, f29 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f30, f30 | |||||
| bun cr0, LL(9999) | |||||
| addi RET, RET, 1 | |||||
| fcmpu cr0, f31, f31 | |||||
| subi RET, RET, 8 | |||||
| fabs f8, f24 | fabs f8, f24 | ||||
| fabs f9, f25 | fabs f9, f25 | ||||
| fabs f10, f26 | fabs f10, f26 | ||||
| @@ -765,8 +870,11 @@ LL(1150): | |||||
| LL(1160): | LL(1160): | ||||
| LFDUX f8, XX, INCX | LFDUX f8, XX, INCX | ||||
| fabs f8, f8 | |||||
| addi RET, RET, 1 | addi RET, RET, 1 | ||||
| fcmpu cr0, f8, f8 | |||||
| bun LL(9999) | |||||
| fabs f8, f8 | |||||
| // addi RET, RET, 1 | |||||
| fcmpu cr0, f1, f8 | fcmpu cr0, f1, f8 | ||||
| beq cr0, LL(9999) | beq cr0, LL(9999) | ||||
| bdnz LL(1160) | bdnz LL(1160) | ||||
| @@ -327,6 +327,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||||
| BLASLONG max = 0; | BLASLONG max = 0; | ||||
| if (n <= 0 || inc_x <= 0) return (max); | if (n <= 0 || inc_x <= 0) return (max); | ||||
| if (n == 1) return(1); | |||||
| if (inc_x == 1) { | if (inc_x == 1) { | ||||
| @@ -335,7 +336,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||||
| BLASLONG n1 = n & -32; | BLASLONG n1 = n & -32; | ||||
| if (n1 > 0) { | if (n1 > 0) { | ||||
| for (int ii=i;ii<i+32;ii++) if (x[ii]!=x[ii]) return(ii+1); | |||||
| max = diamax_kernel_32(n1, x, &maxf); | max = diamax_kernel_32(n1, x, &maxf); | ||||
| i = n1; | i = n1; | ||||
| @@ -343,6 +344,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| while (i < n) { | while (i < n) { | ||||
| if (x[i] != x[i]) return(i+1); | |||||
| if (ABS(x[i]) > maxf) { | if (ABS(x[i]) > maxf) { | ||||
| max = i; | max = i; | ||||
| maxf = ABS(x[i]); | maxf = ABS(x[i]); | ||||
| @@ -356,6 +358,10 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||||
| BLASLONG n1 = n & -4; | BLASLONG n1 = n & -4; | ||||
| while (j < n1) { | while (j < n1) { | ||||
| if (x[i] != x[i]) return(i+1); | |||||
| if (x[i+inc_x] != x[i+inc_x]) return(j+1); | |||||
| if (x[i+2*inc_x] != x[i+2*inc_x]) return(j+2); | |||||
| if (x[i+3*inc_x] != x[i+3*inc_x]) return(j+3); | |||||
| if (ABS(x[i]) > maxf) { | if (ABS(x[i]) > maxf) { | ||||
| max = j; | max = j; | ||||
| maxf = ABS(x[i]); | maxf = ABS(x[i]); | ||||
| @@ -381,6 +387,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||||
| while (j < n) { | while (j < n) { | ||||
| if (x[i] != x[i]) return(j+1); | |||||
| if (ABS(x[i]) > maxf) { | if (ABS(x[i]) > maxf) { | ||||
| max = j; | max = j; | ||||
| maxf = ABS(x[i]); | maxf = ABS(x[i]); | ||||
| @@ -58,6 +58,78 @@ static BLASLONG siamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *maxf) { | |||||
| register __vector float quadruple_values={0,0,0,0}; | register __vector float quadruple_values={0,0,0,0}; | ||||
| register __vector float * v_ptrx=(__vector float *)x; | register __vector float * v_ptrx=(__vector float *)x; | ||||
| for(; i<n; i+=64){ | for(; i<n; i+=64){ | ||||
| if (vec_any_nan(v_ptrx[0])) { | |||||
| float d=vec_extract(v_ptrx[0],0); | |||||
| if (d!=d) return(i+0); | |||||
| d=vec_extract(v_ptrx[0],1); | |||||
| if (d!=d) return(i+1); | |||||
| d=vec_extract(v_ptrx[0],2); | |||||
| if (d!=d) return(i+2); | |||||
| return(i+3); | |||||
| } | |||||
| if (vec_any_nan(v_ptrx[1])) { | |||||
| float d=vec_extract(v_ptrx[1],0); | |||||
| if (d!=d) return(i+4+0); | |||||
| d=vec_extract(v_ptrx[1],1); | |||||
| if (d!=d) return(i+4+1); | |||||
| d=vec_extract(v_ptrx[1],2); | |||||
| if (d!=d) return(i+4+2); | |||||
| return(i+4+3); | |||||
| } | |||||
| if (vec_any_nan(v_ptrx[2])) { | |||||
| float d=vec_extract(v_ptrx[2],0); | |||||
| if (d!=d) return(i+8+0); | |||||
| d=vec_extract(v_ptrx[2],1); | |||||
| if (d!=d) return(i+8+1); | |||||
| d=vec_extract(v_ptrx[2],2); | |||||
| if (d!=d) return(i+8+2); | |||||
| return(i+8+3); | |||||
| } | |||||
| if (vec_any_nan(v_ptrx[3])) { | |||||
| float d=vec_extract(v_ptrx[3],0); | |||||
| if (d!=d) return(i+12+0); | |||||
| d=vec_extract(v_ptrx[3],1); | |||||
| if (d!=d) return(i+12+1); | |||||
| d=vec_extract(v_ptrx[3],2); | |||||
| if (d!=d) return(i+12+2); | |||||
| return(i+12+3); | |||||
| } | |||||
| if (vec_any_nan(v_ptrx[4])) { | |||||
| float d=vec_extract(v_ptrx[4],0); | |||||
| if (d!=d) return(i+16+0); | |||||
| d=vec_extract(v_ptrx[4],1); | |||||
| if (d!=d) return(i+16+1); | |||||
| d=vec_extract(v_ptrx[4],2); | |||||
| if (d!=d) return(i+16+2); | |||||
| return(i+16+3); | |||||
| } | |||||
| if (vec_any_nan(v_ptrx[5])) { | |||||
| float d=vec_extract(v_ptrx[5],0); | |||||
| if (d!=d) return(i+20+0); | |||||
| d=vec_extract(v_ptrx[5],1); | |||||
| if (d!=d) return(i+20+1); | |||||
| d=vec_extract(v_ptrx[5],2); | |||||
| if (d!=d) return(i+20+2); | |||||
| return(i+20+3); | |||||
| } | |||||
| if (vec_any_nan(v_ptrx[6])) { | |||||
| float d=vec_extract(v_ptrx[6],0); | |||||
| if (d!=d) return(i+24+0); | |||||
| d=vec_extract(v_ptrx[6],1); | |||||
| if (d!=d) return(i+24+1); | |||||
| d=vec_extract(v_ptrx[6],2); | |||||
| if (d!=d) return(i+24+2); | |||||
| return(i+24+3); | |||||
| } | |||||
| if (vec_any_nan(v_ptrx[7])) { | |||||
| float d=vec_extract(v_ptrx[7],0); | |||||
| if (d!=d) return(i+28+0); | |||||
| d=vec_extract(v_ptrx[7],1); | |||||
| if (d!=d) return(i+28+1); | |||||
| d=vec_extract(v_ptrx[7],2); | |||||
| if (d!=d) return(i+28+2); | |||||
| return(i+28+3); | |||||
| } | |||||
| //absolute temporary vectors | //absolute temporary vectors | ||||
| register __vector float v0=vec_abs(v_ptrx[0]); | register __vector float v0=vec_abs(v_ptrx[0]); | ||||
| register __vector float v1=vec_abs(v_ptrx[1]); | register __vector float v1=vec_abs(v_ptrx[1]); | ||||
| @@ -226,7 +298,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||||
| BLASLONG max = 0; | BLASLONG max = 0; | ||||
| if (n <= 0 || inc_x <= 0) return (max); | if (n <= 0 || inc_x <= 0) return (max); | ||||
| if (x[0] != x[0]) return(1); | |||||
| if (inc_x == 1) { | if (inc_x == 1) { | ||||
| BLASLONG n1 = n & -64; | BLASLONG n1 = n & -64; | ||||
| @@ -238,6 +310,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||||
| } | } | ||||
| while (i < n) { | while (i < n) { | ||||
| if (x[i] != x[i]) return(i+1); | |||||
| if (ABS(x[i]) > maxf) { | if (ABS(x[i]) > maxf) { | ||||
| max = i; | max = i; | ||||
| maxf = ABS(x[i]); | maxf = ABS(x[i]); | ||||
| @@ -251,18 +324,22 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||||
| BLASLONG n1 = n & -4; | BLASLONG n1 = n & -4; | ||||
| while (j < n1) { | while (j < n1) { | ||||
| if (x[i] != x[i]) return(j+1); | |||||
| if (ABS(x[i]) > maxf) { | if (ABS(x[i]) > maxf) { | ||||
| max = j; | max = j; | ||||
| maxf = ABS(x[i]); | maxf = ABS(x[i]); | ||||
| } | } | ||||
| if (x[i+inc_x] != x[i+inc_x]) return(j+1); | |||||
| if (ABS(x[i + inc_x]) > maxf) { | if (ABS(x[i + inc_x]) > maxf) { | ||||
| max = j + 1; | max = j + 1; | ||||
| maxf = ABS(x[i + inc_x]); | maxf = ABS(x[i + inc_x]); | ||||
| } | } | ||||
| if (x[i+2*inc_x] != x[i+2*inc_x]) return(j+2); | |||||
| if (ABS(x[i + 2 * inc_x]) > maxf) { | if (ABS(x[i + 2 * inc_x]) > maxf) { | ||||
| max = j + 2; | max = j + 2; | ||||
| maxf = ABS(x[i + 2 * inc_x]); | maxf = ABS(x[i + 2 * inc_x]); | ||||
| } | } | ||||
| if (x[i+3*inc_x] != x[i+3*inc_x]) return(j+3); | |||||
| if (ABS(x[i + 3 * inc_x]) > maxf) { | if (ABS(x[i + 3 * inc_x]) > maxf) { | ||||
| max = j + 3; | max = j + 3; | ||||
| maxf = ABS(x[i + 3 * inc_x]); | maxf = ABS(x[i + 3 * inc_x]); | ||||
| @@ -276,6 +353,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||||
| while (j < n) { | while (j < n) { | ||||
| if (x[i] != x[i]) return(j+1); | |||||
| if (ABS(x[i]) > maxf) { | if (ABS(x[i]) > maxf) { | ||||
| max = j; | max = j; | ||||
| maxf = ABS(x[i]); | maxf = ABS(x[i]); | ||||
| @@ -56,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||||
| BLASLONG max=0; | BLASLONG max=0; | ||||
| if (n <= 0 || inc_x <= 0) return(max); | if (n <= 0 || inc_x <= 0) return(max); | ||||
| if (n==1) return(1); | |||||
| if (x[0]!=x[0]) return(1); | |||||
| maxf=ABS(x[0]); | maxf=ABS(x[0]); | ||||
| ix += inc_x; | ix += inc_x; | ||||
| i++; | i++; | ||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| if (x[ix]!=x[ix]) return(i+1); | |||||
| if( ABS(x[ix]) > maxf ) | if( ABS(x[ix]) > maxf ) | ||||
| { | { | ||||
| max = i; | max = i; | ||||
| @@ -56,13 +56,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||||
| BLASLONG min=0; | BLASLONG min=0; | ||||
| if (n <= 0 || inc_x <= 0) return(min); | if (n <= 0 || inc_x <= 0) return(min); | ||||
| minf=ABS(x[0]); | |||||
| if (n==1) return(1); | |||||
| if (x[0]!=x[0]) return(1); | |||||
| minf=ABS(x[0]); | |||||
| ix += inc_x; | ix += inc_x; | ||||
| i++; | i++; | ||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||
| if (x[ix]!=x[ix]) return(i+1); | |||||
| if( ABS(x[ix]) < ABS(minf) ) | if( ABS(x[ix]) < ABS(minf) ) | ||||
| { | { | ||||
| min = i; | min = i; | ||||
| @@ -100,6 +100,8 @@ | |||||
| movl $1, RET | movl $1, RET | ||||
| FLD (X) | FLD (X) | ||||
| fcomi %st, %st | |||||
| jp .L999 | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| #endif | #endif | ||||
| @@ -121,6 +123,8 @@ | |||||
| #endif | #endif | ||||
| FLD 0 * SIZE(X) | FLD 0 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| #endif | #endif | ||||
| @@ -131,6 +135,8 @@ | |||||
| incl NUM | incl NUM | ||||
| FLD 1 * SIZE(X) | FLD 1 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| #endif | #endif | ||||
| @@ -141,6 +147,8 @@ | |||||
| incl NUM | incl NUM | ||||
| FLD 2 * SIZE(X) | FLD 2 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| #endif | #endif | ||||
| @@ -151,6 +159,8 @@ | |||||
| incl NUM | incl NUM | ||||
| FLD 3 * SIZE(X) | FLD 3 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| #endif | #endif | ||||
| @@ -161,6 +171,8 @@ | |||||
| incl NUM | incl NUM | ||||
| FLD 4 * SIZE(X) | FLD 4 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| #endif | #endif | ||||
| @@ -171,6 +183,8 @@ | |||||
| incl NUM | incl NUM | ||||
| FLD 5 * SIZE(X) | FLD 5 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| #endif | #endif | ||||
| @@ -181,6 +195,8 @@ | |||||
| incl NUM | incl NUM | ||||
| FLD 6 * SIZE(X) | FLD 6 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| #endif | #endif | ||||
| @@ -191,6 +207,8 @@ | |||||
| incl NUM | incl NUM | ||||
| FLD 7 * SIZE(X) | FLD 7 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| #endif | #endif | ||||
| @@ -215,6 +233,8 @@ | |||||
| .L21: | .L21: | ||||
| FLD 0 * SIZE(X) | FLD 0 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| #endif | #endif | ||||
| @@ -238,6 +258,8 @@ | |||||
| .L50: | .L50: | ||||
| FLD 0 * SIZE(X) | FLD 0 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| addl INCX, X | addl INCX, X | ||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| @@ -249,6 +271,8 @@ | |||||
| incl NUM | incl NUM | ||||
| FLD 0 * SIZE(X) | FLD 0 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| addl INCX, X | addl INCX, X | ||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| @@ -260,6 +284,8 @@ | |||||
| incl NUM | incl NUM | ||||
| FLD 0 * SIZE(X) | FLD 0 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| addl INCX, X | addl INCX, X | ||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| @@ -271,6 +297,8 @@ | |||||
| incl NUM | incl NUM | ||||
| FLD 0 * SIZE(X) | FLD 0 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| addl INCX, X | addl INCX, X | ||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| @@ -282,6 +310,8 @@ | |||||
| incl NUM | incl NUM | ||||
| FLD 0 * SIZE(X) | FLD 0 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| addl INCX, X | addl INCX, X | ||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| @@ -293,6 +323,8 @@ | |||||
| incl NUM | incl NUM | ||||
| FLD 0 * SIZE(X) | FLD 0 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| addl INCX, X | addl INCX, X | ||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| @@ -304,6 +336,8 @@ | |||||
| incl NUM | incl NUM | ||||
| FLD 0 * SIZE(X) | FLD 0 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| addl INCX, X | addl INCX, X | ||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| @@ -315,6 +349,8 @@ | |||||
| incl NUM | incl NUM | ||||
| FLD 0 * SIZE(X) | FLD 0 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| addl INCX, X | addl INCX, X | ||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| @@ -338,6 +374,8 @@ | |||||
| .L61: | .L61: | ||||
| FLD 0 * SIZE(X) | FLD 0 * SIZE(X) | ||||
| fucomi | |||||
| jp .L998 | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| fabs | fabs | ||||
| #endif | #endif | ||||
| @@ -361,4 +399,7 @@ | |||||
| popl %ebp | popl %ebp | ||||
| ret | ret | ||||
| .L998: mov NUM, RET | |||||
| jmp .L999 | |||||
| EPILOGUE | EPILOGUE | ||||
| @@ -93,6 +93,10 @@ | |||||
| addq INCX, X | addq INCX, X | ||||
| decq M | decq M | ||||
| shufps $0, %xmm0, %xmm0 | shufps $0, %xmm0, %xmm0 | ||||
| incq RET | |||||
| comiss %xmm0, %xmm0 | |||||
| jp .L999 | |||||
| decq RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andps %xmm15, %xmm0 | andps %xmm15, %xmm0 | ||||
| #endif | #endif | ||||
| @@ -254,6 +258,10 @@ | |||||
| decq M | decq M | ||||
| addq $SIZE, X | addq $SIZE, X | ||||
| incq RET | |||||
| comiss %xmm1, %xmm1 | |||||
| jp .L998 | |||||
| decq RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andps %xmm15, %xmm1 | andps %xmm15, %xmm1 | ||||
| #endif | #endif | ||||
| @@ -268,6 +276,14 @@ | |||||
| movss 0 * SIZE(X), %xmm1 | movss 0 * SIZE(X), %xmm1 | ||||
| movss 1 * SIZE(X), %xmm2 | movss 1 * SIZE(X), %xmm2 | ||||
| incq RET | |||||
| comiss %xmm1, %xmm1 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm2, %xmm2 | |||||
| jp .L998 | |||||
| decq RET | |||||
| decq RET | |||||
| subq $2, M | subq $2, M | ||||
| addq $2 * SIZE, X | addq $2 * SIZE, X | ||||
| @@ -332,6 +348,31 @@ | |||||
| movss 5 * SIZE(X), %xmm6 | movss 5 * SIZE(X), %xmm6 | ||||
| movss 6 * SIZE(X), %xmm7 | movss 6 * SIZE(X), %xmm7 | ||||
| movss 7 * SIZE(X), %xmm8 | movss 7 * SIZE(X), %xmm8 | ||||
| incq RET | |||||
| comiss %xmm1, %xmm1 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm2, %xmm2 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm3, %xmm3 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm4, %xmm4 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm5, %xmm5 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm6, %xmm6 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm7, %xmm7 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm8, %xmm8 | |||||
| jp .L998 | |||||
| subq $8, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andps %xmm15, %xmm1 | andps %xmm15, %xmm1 | ||||
| andps %xmm15, %xmm2 | andps %xmm15, %xmm2 | ||||
| @@ -378,6 +419,19 @@ | |||||
| movss 1 * SIZE(X), %xmm2 | movss 1 * SIZE(X), %xmm2 | ||||
| movss 2 * SIZE(X), %xmm3 | movss 2 * SIZE(X), %xmm3 | ||||
| movss 3 * SIZE(X), %xmm4 | movss 3 * SIZE(X), %xmm4 | ||||
| incq RET | |||||
| comiss %xmm1, %xmm1 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm2, %xmm2 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm3, %xmm3 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm4, %xmm4 | |||||
| jp .L998 | |||||
| subq $4, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andps %xmm15, %xmm1 | andps %xmm15, %xmm1 | ||||
| andps %xmm15, %xmm2 | andps %xmm15, %xmm2 | ||||
| @@ -405,6 +459,13 @@ | |||||
| movss 0 * SIZE(X), %xmm1 | movss 0 * SIZE(X), %xmm1 | ||||
| movss 1 * SIZE(X), %xmm2 | movss 1 * SIZE(X), %xmm2 | ||||
| incq RET | |||||
| comiss %xmm1, %xmm1 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm2, %xmm2 | |||||
| jp .L998 | |||||
| subq $2, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andps %xmm15, %xmm1 | andps %xmm15, %xmm1 | ||||
| andps %xmm15, %xmm2 | andps %xmm15, %xmm2 | ||||
| @@ -593,6 +654,31 @@ | |||||
| movss 5 * SIZE(X), %xmm6 | movss 5 * SIZE(X), %xmm6 | ||||
| movss 6 * SIZE(X), %xmm7 | movss 6 * SIZE(X), %xmm7 | ||||
| movss 7 * SIZE(X), %xmm8 | movss 7 * SIZE(X), %xmm8 | ||||
| incq RET | |||||
| comiss %xmm1, %xmm1 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm2, %xmm2 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm3, %xmm3 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm4, %xmm4 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm5, %xmm5 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm6, %xmm6 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm7, %xmm7 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm8, %xmm8 | |||||
| jp .L998 | |||||
| subq $8, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andps %xmm15, %xmm1 | andps %xmm15, %xmm1 | ||||
| andps %xmm15, %xmm2 | andps %xmm15, %xmm2 | ||||
| @@ -639,6 +725,19 @@ | |||||
| movss 1 * SIZE(X), %xmm2 | movss 1 * SIZE(X), %xmm2 | ||||
| movss 2 * SIZE(X), %xmm3 | movss 2 * SIZE(X), %xmm3 | ||||
| movss 3 * SIZE(X), %xmm4 | movss 3 * SIZE(X), %xmm4 | ||||
| incq RET | |||||
| comiss %xmm1, %xmm1 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm2, %xmm2 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm3, %xmm3 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm4, %xmm4 | |||||
| jp .L998 | |||||
| subq $4, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andps %xmm15, %xmm1 | andps %xmm15, %xmm1 | ||||
| andps %xmm15, %xmm2 | andps %xmm15, %xmm2 | ||||
| @@ -666,6 +765,13 @@ | |||||
| movss 0 * SIZE(X), %xmm1 | movss 0 * SIZE(X), %xmm1 | ||||
| movss 1 * SIZE(X), %xmm2 | movss 1 * SIZE(X), %xmm2 | ||||
| incq RET | |||||
| comiss %xmm1, %xmm1 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm2, %xmm2 | |||||
| jp .L998 | |||||
| subq $2, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andps %xmm15, %xmm1 | andps %xmm15, %xmm1 | ||||
| andps %xmm15, %xmm2 | andps %xmm15, %xmm2 | ||||
| @@ -885,6 +991,31 @@ | |||||
| movss 0 * SIZE(X), %xmm2 | movss 0 * SIZE(X), %xmm2 | ||||
| subq INCX, X | subq INCX, X | ||||
| movss 0 * SIZE(X), %xmm1 | movss 0 * SIZE(X), %xmm1 | ||||
| incq RET | |||||
| comiss %xmm1, %xmm1 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm2, %xmm2 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm3, %xmm3 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm4, %xmm4 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm5, %xmm5 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm6, %xmm6 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm7, %xmm7 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm8, %xmm8 | |||||
| jp .L998 | |||||
| subq $8, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andps %xmm15, %xmm1 | andps %xmm15, %xmm1 | ||||
| andps %xmm15, %xmm2 | andps %xmm15, %xmm2 | ||||
| @@ -932,7 +1063,19 @@ | |||||
| addq INCX, X | addq INCX, X | ||||
| movss 0 * SIZE(X), %xmm4 | movss 0 * SIZE(X), %xmm4 | ||||
| addq INCX, X | addq INCX, X | ||||
| incq RET | |||||
| comiss %xmm1, %xmm1 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm2, %xmm2 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm3, %xmm3 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm4, %xmm4 | |||||
| jp .L998 | |||||
| subq $4, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andps %xmm15, %xmm1 | andps %xmm15, %xmm1 | ||||
| andps %xmm15, %xmm2 | andps %xmm15, %xmm2 | ||||
| @@ -961,6 +1104,13 @@ | |||||
| addq INCX, X | addq INCX, X | ||||
| movss 0 * SIZE(X), %xmm2 | movss 0 * SIZE(X), %xmm2 | ||||
| addq INCX, X | addq INCX, X | ||||
| incq RET | |||||
| comiss %xmm1, %xmm1 | |||||
| jp .L998 | |||||
| incq RET | |||||
| comiss %xmm2, %xmm2 | |||||
| jp .L998 | |||||
| subq $2, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andps %xmm15, %xmm1 | andps %xmm15, %xmm1 | ||||
| andps %xmm15, %xmm2 | andps %xmm15, %xmm2 | ||||
| @@ -982,5 +1132,9 @@ | |||||
| ret | ret | ||||
| .L998: | |||||
| // incq RET | |||||
| jmp .L999 | |||||
| EPILOGUE | EPILOGUE | ||||
| @@ -79,6 +79,8 @@ | |||||
| movsd (X), %xmm0 | movsd (X), %xmm0 | ||||
| addq INCX, X | addq INCX, X | ||||
| decq M | decq M | ||||
| comisd %xmm0, %xmm0 | |||||
| jp .L987 | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andpd %xmm15, %xmm0 | andpd %xmm15, %xmm0 | ||||
| #endif | #endif | ||||
| @@ -269,6 +271,11 @@ | |||||
| je .L21 | je .L21 | ||||
| movsd 0 * SIZE(X), %xmm1 | movsd 0 * SIZE(X), %xmm1 | ||||
| incq RET | |||||
| comisd %xmm1, %xmm1 | |||||
| jp .L987 | |||||
| decq RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andpd %xmm15, %xmm1 | andpd %xmm15, %xmm1 | ||||
| #endif | #endif | ||||
| @@ -342,6 +349,32 @@ | |||||
| movsd 5 * SIZE(X), %xmm6 | movsd 5 * SIZE(X), %xmm6 | ||||
| movsd 6 * SIZE(X), %xmm7 | movsd 6 * SIZE(X), %xmm7 | ||||
| movsd 7 * SIZE(X), %xmm8 | movsd 7 * SIZE(X), %xmm8 | ||||
| incq RET | |||||
| comisd %xmm1, %xmm1 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm2, %xmm2 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm3, %xmm3 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm4, %xmm4 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm5, %xmm5 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm6, %xmm6 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm7, %xmm7 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm8, %xmm8 | |||||
| jp .L987 | |||||
| subq $8, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andpd %xmm15, %xmm1 | andpd %xmm15, %xmm1 | ||||
| andpd %xmm15, %xmm2 | andpd %xmm15, %xmm2 | ||||
| @@ -374,9 +407,9 @@ | |||||
| comisd %xmm0, %xmm6 | comisd %xmm0, %xmm6 | ||||
| je .L999 | je .L999 | ||||
| incq RET | incq RET | ||||
| comisd %xmm0, %xmm7 | |||||
| je .L999 | |||||
| incq RET | |||||
| comisd %xmm0, %xmm7 | |||||
| je .L999 | |||||
| incq RET | |||||
| jmp .L999 | jmp .L999 | ||||
| ALIGN_3 | ALIGN_3 | ||||
| @@ -388,6 +421,21 @@ | |||||
| movsd 1 * SIZE(X), %xmm2 | movsd 1 * SIZE(X), %xmm2 | ||||
| movsd 2 * SIZE(X), %xmm3 | movsd 2 * SIZE(X), %xmm3 | ||||
| movsd 3 * SIZE(X), %xmm4 | movsd 3 * SIZE(X), %xmm4 | ||||
| incq RET | |||||
| comisd %xmm1, %xmm1 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm2, %xmm2 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm3, %xmm3 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm4, %xmm4 | |||||
| jp .L987 | |||||
| subq $4, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andpd %xmm15, %xmm1 | andpd %xmm15, %xmm1 | ||||
| andpd %xmm15, %xmm2 | andpd %xmm15, %xmm2 | ||||
| @@ -415,6 +463,14 @@ | |||||
| movsd 0 * SIZE(X), %xmm1 | movsd 0 * SIZE(X), %xmm1 | ||||
| movsd 1 * SIZE(X), %xmm2 | movsd 1 * SIZE(X), %xmm2 | ||||
| incq RET | |||||
| comisd %xmm1, %xmm1 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm2, %xmm2 | |||||
| jp .L987 | |||||
| subq $2, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andpd %xmm15, %xmm1 | andpd %xmm15, %xmm1 | ||||
| andpd %xmm15, %xmm2 | andpd %xmm15, %xmm2 | ||||
| @@ -670,6 +726,32 @@ | |||||
| movsd 5 * SIZE(X), %xmm6 | movsd 5 * SIZE(X), %xmm6 | ||||
| movsd 6 * SIZE(X), %xmm7 | movsd 6 * SIZE(X), %xmm7 | ||||
| movsd 7 * SIZE(X), %xmm8 | movsd 7 * SIZE(X), %xmm8 | ||||
| incq RET | |||||
| comisd %xmm1, %xmm1 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm2, %xmm2 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm3, %xmm3 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm4, %xmm4 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm5, %xmm5 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm6, %xmm6 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm7, %xmm7 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm8, %xmm8 | |||||
| jp .L987 | |||||
| subq $8, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andpd %xmm15, %xmm1 | andpd %xmm15, %xmm1 | ||||
| andpd %xmm15, %xmm2 | andpd %xmm15, %xmm2 | ||||
| @@ -716,6 +798,20 @@ | |||||
| movsd 1 * SIZE(X), %xmm2 | movsd 1 * SIZE(X), %xmm2 | ||||
| movsd 2 * SIZE(X), %xmm3 | movsd 2 * SIZE(X), %xmm3 | ||||
| movsd 3 * SIZE(X), %xmm4 | movsd 3 * SIZE(X), %xmm4 | ||||
| incq RET | |||||
| comisd %xmm1, %xmm1 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm2, %xmm2 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm3, %xmm3 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm4, %xmm4 | |||||
| jp .L987 | |||||
| subq $4, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andpd %xmm15, %xmm1 | andpd %xmm15, %xmm1 | ||||
| andpd %xmm15, %xmm2 | andpd %xmm15, %xmm2 | ||||
| @@ -743,11 +839,21 @@ | |||||
| movsd 0 * SIZE(X), %xmm1 | movsd 0 * SIZE(X), %xmm1 | ||||
| movsd 1 * SIZE(X), %xmm2 | movsd 1 * SIZE(X), %xmm2 | ||||
| incq RET | |||||
| comisd %xmm1, %xmm1 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm2, %xmm2 | |||||
| jp .L987 | |||||
| subq $2, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andpd %xmm15, %xmm1 | andpd %xmm15, %xmm1 | ||||
| andpd %xmm15, %xmm2 | andpd %xmm15, %xmm2 | ||||
| #endif | #endif | ||||
| addq $2 * SIZE, X | addq $2 * SIZE, X | ||||
| comisd %xmm0, %xmm0 | |||||
| jp .L987 | |||||
| incq RET | incq RET | ||||
| comisd %xmm0, %xmm1 | comisd %xmm0, %xmm1 | ||||
| je .L999 | je .L999 | ||||
| @@ -962,6 +1068,7 @@ | |||||
| ALIGN_4 | ALIGN_4 | ||||
| .L92: | .L92: | ||||
| movsd 0 * SIZE(X), %xmm1 | movsd 0 * SIZE(X), %xmm1 | ||||
| addq INCX, X | addq INCX, X | ||||
| movhpd 0 * SIZE(X), %xmm1 | movhpd 0 * SIZE(X), %xmm1 | ||||
| @@ -1033,6 +1140,32 @@ | |||||
| movsd 0 * SIZE(X), %xmm2 | movsd 0 * SIZE(X), %xmm2 | ||||
| subq INCX, X | subq INCX, X | ||||
| movsd 0 * SIZE(X), %xmm1 | movsd 0 * SIZE(X), %xmm1 | ||||
| incq RET | |||||
| comisd %xmm1, %xmm1 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm2, %xmm2 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm3, %xmm3 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm4, %xmm4 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm5, %xmm5 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm6, %xmm6 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm7, %xmm7 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm8, %xmm8 | |||||
| jp .L987 | |||||
| subq $8, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andpd %xmm15, %xmm1 | andpd %xmm15, %xmm1 | ||||
| andpd %xmm15, %xmm2 | andpd %xmm15, %xmm2 | ||||
| @@ -1083,6 +1216,20 @@ | |||||
| addq INCX, X | addq INCX, X | ||||
| movsd 0 * SIZE(X), %xmm4 | movsd 0 * SIZE(X), %xmm4 | ||||
| addq INCX, X | addq INCX, X | ||||
| incq RET | |||||
| comisd %xmm1, %xmm1 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm2, %xmm2 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm3, %xmm3 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm4, %xmm4 | |||||
| jp .L987 | |||||
| subq $4, RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andpd %xmm15, %xmm1 | andpd %xmm15, %xmm1 | ||||
| andpd %xmm15, %xmm2 | andpd %xmm15, %xmm2 | ||||
| @@ -1111,6 +1258,14 @@ | |||||
| addq INCX, X | addq INCX, X | ||||
| movsd 0 * SIZE(X), %xmm2 | movsd 0 * SIZE(X), %xmm2 | ||||
| addq INCX, X | addq INCX, X | ||||
| incq RET | |||||
| comisd %xmm1, %xmm1 | |||||
| jp .L987 | |||||
| incq RET | |||||
| comisd %xmm2, %xmm2 | |||||
| jp .L987 | |||||
| decq RET | |||||
| decq RET | |||||
| #ifdef USE_ABS | #ifdef USE_ABS | ||||
| andpd %xmm15, %xmm1 | andpd %xmm15, %xmm1 | ||||
| andpd %xmm15, %xmm2 | andpd %xmm15, %xmm2 | ||||
| @@ -1122,7 +1277,6 @@ | |||||
| comisd %xmm0, %xmm2 | comisd %xmm0, %xmm2 | ||||
| je .L999 | je .L999 | ||||
| ALIGN_3 | ALIGN_3 | ||||
| .L98: | .L98: | ||||
| incq RET | incq RET | ||||
| ALIGN_3 | ALIGN_3 | ||||
| @@ -1132,5 +1286,8 @@ | |||||
| ret | ret | ||||
| EPILOGUE | |||||
| .L987: | |||||
| incq RET //count from xmm0 | |||||
| jmp .L999 | |||||
| EPILOGUE | |||||
| @@ -58,3 +58,29 @@ CTEST(amax, damax){ | |||||
| } | } | ||||
| #endif | #endif | ||||
| #ifdef BUILD_SINGLE | |||||
| CTEST(amax, isamax_nan){ | |||||
| blasint N=3, inc=1; | |||||
| int te_max=0, tr_max=2; | |||||
| float x[]={1., 0./0., 0./0. }; | |||||
| //float x[]={ 0./0., 2., 3. }; | |||||
| te_max=BLASFUNC(isamax)(&N, x, &inc); | |||||
| ASSERT_EQUAL(tr_max, te_max); | |||||
| } | |||||
| #endif | |||||
| #ifdef BUILD_DOUBLE | |||||
| CTEST(amax, idamax_nan){ | |||||
| blasint N=4, inc=1; | |||||
| int te_max=0, tr_max=1; | |||||
| //float x[]={1., 0./0., 0./0. }; | |||||
| double x[]={ 0./0., 1.,2. ,3.}; | |||||
| te_max=BLASFUNC(idamax)(&N, x, &inc); | |||||
| ASSERT_EQUAL(tr_max, te_max); | |||||
| } | |||||
| #endif | |||||