Browse Source

Mark iamax_sse.S as unsuitable for MIN

due to issue #2116
pull/2125/head
Martin Kroeker GitHub 7 years ago
parent
commit
327e5d8de5
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 56 additions and 50 deletions
  1. +56
    -50
      kernel/x86_64/iamax_sse.S

+ 56
- 50
kernel/x86_64/iamax_sse.S View File

@@ -36,6 +36,10 @@
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/


/* This kernel was found to give wrong results when used for ISMIN/ISAMIN
with increment != 1, although it appears to be correct for corresponding
MAX operations. See issue 2116 */

#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"


@@ -48,9 +52,11 @@
#define XX %r10 #define XX %r10
#define MM %r11 #define MM %r11


#define MAXPS maxps
#define MAXSS maxss
#ifdef USE_MIN #ifdef USE_MIN
#define maxps minps
#define maxss minss
#define MAXPS minps
#define MAXSS minss
#endif #endif


#include "l1param.h" #include "l1param.h"
@@ -103,7 +109,7 @@
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm4 andps %xmm15, %xmm4
#endif #endif
maxss %xmm4, %xmm0
MAXSS %xmm4, %xmm0
decq M decq M
addq $SIZE, X addq $SIZE, X
ALIGN_3 ALIGN_3
@@ -117,7 +123,7 @@
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm4 andps %xmm15, %xmm4
#endif #endif
maxps %xmm4, %xmm1
MAXPS %xmm4, %xmm1
subq $2, M subq $2, M
addq $2 * SIZE, X addq $2 * SIZE, X
ALIGN_3 ALIGN_3
@@ -137,25 +143,25 @@
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm4 andps %xmm15, %xmm4
#endif #endif
maxps %xmm4, %xmm0
MAXPS %xmm4, %xmm0


movaps 4 * SIZE(X), %xmm5 movaps 4 * SIZE(X), %xmm5
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm5 andps %xmm15, %xmm5
#endif #endif
maxps %xmm5, %xmm1
MAXPS %xmm5, %xmm1


movaps 8 * SIZE(X), %xmm6 movaps 8 * SIZE(X), %xmm6
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm6 andps %xmm15, %xmm6
#endif #endif
maxps %xmm6, %xmm2
MAXPS %xmm6, %xmm2


movaps 12 * SIZE(X), %xmm7 movaps 12 * SIZE(X), %xmm7
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm7 andps %xmm15, %xmm7
#endif #endif
maxps %xmm7, %xmm3
MAXPS %xmm7, %xmm3


addq $16 * SIZE, X addq $16 * SIZE, X
decq I decq I
@@ -173,13 +179,13 @@
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm4 andps %xmm15, %xmm4
#endif #endif
maxps %xmm4, %xmm0
MAXPS %xmm4, %xmm0


movaps 4 * SIZE(X), %xmm5 movaps 4 * SIZE(X), %xmm5
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm5 andps %xmm15, %xmm5
#endif #endif
maxps %xmm5, %xmm1
MAXPS %xmm5, %xmm1
addq $8 * SIZE, X addq $8 * SIZE, X
ALIGN_3 ALIGN_3


@@ -191,7 +197,7 @@
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm6 andps %xmm15, %xmm6
#endif #endif
maxps %xmm6, %xmm2
MAXPS %xmm6, %xmm2
addq $4 * SIZE, X addq $4 * SIZE, X
ALIGN_3 ALIGN_3


@@ -204,7 +210,7 @@
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm7 andps %xmm15, %xmm7
#endif #endif
maxps %xmm7, %xmm3
MAXPS %xmm7, %xmm3
addq $2 * SIZE, X addq $2 * SIZE, X


.L18: .L18:
@@ -215,22 +221,22 @@
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm4 andps %xmm15, %xmm4
#endif #endif
maxss %xmm4, %xmm0
MAXSS %xmm4, %xmm0
ALIGN_3 ALIGN_3


.L20: .L20:
movq XX, X movq XX, X
movq MM, M movq MM, M


maxps %xmm1, %xmm0
maxps %xmm3, %xmm2
maxps %xmm2, %xmm0
MAXPS %xmm1, %xmm0
MAXPS %xmm3, %xmm2
MAXPS %xmm2, %xmm0
movaps %xmm0, %xmm1 movaps %xmm0, %xmm1
movhlps %xmm0, %xmm0 movhlps %xmm0, %xmm0
maxps %xmm1, %xmm0
MAXPS %xmm1, %xmm0
movaps %xmm0, %xmm1 movaps %xmm0, %xmm1
shufps $1, %xmm0, %xmm0 shufps $1, %xmm0, %xmm0
maxss %xmm1, %xmm0
MAXSS %xmm1, %xmm0
shufps $0, %xmm0, %xmm0 shufps $0, %xmm0, %xmm0


testq $4, X testq $4, X
@@ -427,28 +433,28 @@
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm4 andps %xmm15, %xmm4
#endif #endif
maxps %xmm4, %xmm0
MAXPS %xmm4, %xmm0


movsd 4 * SIZE(X), %xmm5 movsd 4 * SIZE(X), %xmm5
movhps 6 * SIZE(X), %xmm5 movhps 6 * SIZE(X), %xmm5
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm5 andps %xmm15, %xmm5
#endif #endif
maxps %xmm5, %xmm1
MAXPS %xmm5, %xmm1


movsd 8 * SIZE(X), %xmm6 movsd 8 * SIZE(X), %xmm6
movhps 10 * SIZE(X), %xmm6 movhps 10 * SIZE(X), %xmm6
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm6 andps %xmm15, %xmm6
#endif #endif
maxps %xmm6, %xmm2
MAXPS %xmm6, %xmm2


movsd 12 * SIZE(X), %xmm7 movsd 12 * SIZE(X), %xmm7
movhps 14 * SIZE(X), %xmm7 movhps 14 * SIZE(X), %xmm7
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm7 andps %xmm15, %xmm7
#endif #endif
maxps %xmm7, %xmm3
MAXPS %xmm7, %xmm3


addq $16 * SIZE, X addq $16 * SIZE, X
decq I decq I
@@ -467,14 +473,14 @@
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm4 andps %xmm15, %xmm4
#endif #endif
maxps %xmm4, %xmm0
MAXPS %xmm4, %xmm0


movsd 4 * SIZE(X), %xmm5 movsd 4 * SIZE(X), %xmm5
movhps 6 * SIZE(X), %xmm5 movhps 6 * SIZE(X), %xmm5
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm5 andps %xmm15, %xmm5
#endif #endif
maxps %xmm5, %xmm1
MAXPS %xmm5, %xmm1


addq $8 * SIZE, X addq $8 * SIZE, X
ALIGN_3 ALIGN_3
@@ -488,7 +494,7 @@
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm6 andps %xmm15, %xmm6
#endif #endif
maxps %xmm6, %xmm2
MAXPS %xmm6, %xmm2
addq $4 * SIZE, X addq $4 * SIZE, X
ALIGN_3 ALIGN_3


@@ -501,7 +507,7 @@
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm7 andps %xmm15, %xmm7
#endif #endif
maxps %xmm7, %xmm3
MAXPS %xmm7, %xmm3
addq $2 * SIZE, X addq $2 * SIZE, X


.L38: .L38:
@@ -512,7 +518,7 @@
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm4 andps %xmm15, %xmm4
#endif #endif
maxss %xmm4, %xmm0
MAXSS %xmm4, %xmm0
jmp .L40 jmp .L40
ALIGN_4 ALIGN_4


@@ -520,15 +526,15 @@
movq XX, X movq XX, X
movq MM, M movq MM, M


maxps %xmm1, %xmm0
maxps %xmm3, %xmm2
maxps %xmm2, %xmm0
MAXPS %xmm1, %xmm0
MAXPS %xmm3, %xmm2
MAXPS %xmm2, %xmm0
movaps %xmm0, %xmm1 movaps %xmm0, %xmm1
movhlps %xmm0, %xmm0 movhlps %xmm0, %xmm0
maxps %xmm1, %xmm0
MAXPS %xmm1, %xmm0
movaps %xmm0, %xmm1 movaps %xmm0, %xmm1
shufps $1, %xmm0, %xmm0 shufps $1, %xmm0, %xmm0
maxss %xmm1, %xmm0
MAXSS %xmm1, %xmm0
shufps $0, %xmm0, %xmm0 shufps $0, %xmm0, %xmm0


movq M, I movq M, I
@@ -687,56 +693,56 @@
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm4 andps %xmm15, %xmm4
#endif #endif
maxss %xmm4, %xmm0
MAXSS %xmm4, %xmm0


movss 0 * SIZE(X), %xmm5 movss 0 * SIZE(X), %xmm5
addq INCX, X addq INCX, X
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm5 andps %xmm15, %xmm5
#endif #endif
maxss %xmm5, %xmm1
MAXSS %xmm5, %xmm1


movss 0 * SIZE(X), %xmm6 movss 0 * SIZE(X), %xmm6
addq INCX, X addq INCX, X
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm6 andps %xmm15, %xmm6
#endif #endif
maxss %xmm6, %xmm2
MAXSS %xmm6, %xmm2


movss 0 * SIZE(X), %xmm7 movss 0 * SIZE(X), %xmm7
addq INCX, X addq INCX, X
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm7 andps %xmm15, %xmm7
#endif #endif
maxss %xmm7, %xmm3
MAXSS %xmm7, %xmm3


movss 0 * SIZE(X), %xmm4 movss 0 * SIZE(X), %xmm4
addq INCX, X addq INCX, X
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm4 andps %xmm15, %xmm4
#endif #endif
maxss %xmm4, %xmm0
MAXSS %xmm4, %xmm0


movss 0 * SIZE(X), %xmm5 movss 0 * SIZE(X), %xmm5
addq INCX, X addq INCX, X
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm5 andps %xmm15, %xmm5
#endif #endif
maxss %xmm5, %xmm1
MAXSS %xmm5, %xmm1


movss 0 * SIZE(X), %xmm6 movss 0 * SIZE(X), %xmm6
addq INCX, X addq INCX, X
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm6 andps %xmm15, %xmm6
#endif #endif
maxss %xmm6, %xmm2
MAXSS %xmm6, %xmm2


movss 0 * SIZE(X), %xmm7 movss 0 * SIZE(X), %xmm7
addq INCX, X addq INCX, X
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm7 andps %xmm15, %xmm7
#endif #endif
maxss %xmm7, %xmm3
MAXSS %xmm7, %xmm3


decq I decq I
jg .L81 jg .L81
@@ -754,28 +760,28 @@
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm4 andps %xmm15, %xmm4
#endif #endif
maxss %xmm4, %xmm0
MAXSS %xmm4, %xmm0


movss 0 * SIZE(X), %xmm5 movss 0 * SIZE(X), %xmm5
addq INCX, X addq INCX, X
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm5 andps %xmm15, %xmm5
#endif #endif
maxss %xmm5, %xmm1
MAXSS %xmm5, %xmm1


movss 0 * SIZE(X), %xmm6 movss 0 * SIZE(X), %xmm6
addq INCX, X addq INCX, X
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm6 andps %xmm15, %xmm6
#endif #endif
maxss %xmm6, %xmm2
MAXSS %xmm6, %xmm2


movss 0 * SIZE(X), %xmm7 movss 0 * SIZE(X), %xmm7
addq INCX, X addq INCX, X
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm7 andps %xmm15, %xmm7
#endif #endif
maxss %xmm7, %xmm3
MAXSS %xmm7, %xmm3
ALIGN_3 ALIGN_3


.L86: .L86:
@@ -787,14 +793,14 @@
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm4 andps %xmm15, %xmm4
#endif #endif
maxss %xmm4, %xmm0
MAXSS %xmm4, %xmm0


movss 0 * SIZE(X), %xmm5 movss 0 * SIZE(X), %xmm5
addq INCX, X addq INCX, X
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm5 andps %xmm15, %xmm5
#endif #endif
maxss %xmm5, %xmm1
MAXSS %xmm5, %xmm1
ALIGN_3 ALIGN_3


.L87: .L87:
@@ -806,16 +812,16 @@
#ifdef USE_ABS #ifdef USE_ABS
andps %xmm15, %xmm6 andps %xmm15, %xmm6
#endif #endif
maxss %xmm6, %xmm2
MAXSS %xmm6, %xmm2
ALIGN_4 ALIGN_4


.L90: .L90:
movq XX, X movq XX, X
movq MM, M movq MM, M


maxss %xmm1, %xmm0
maxss %xmm3, %xmm2
maxss %xmm2, %xmm0
MAXSS %xmm1, %xmm0
MAXSS %xmm3, %xmm2
MAXSS %xmm2, %xmm0
shufps $0, %xmm0, %xmm0 shufps $0, %xmm0, %xmm0


movq M, I movq M, I


Loading…
Cancel
Save