Browse Source
Merge pull request #2881 from mattip/fninit
add fninit to reset fpu registers before assembler routines
tags/v0.3.11^2
Martin Kroeker
GitHub
5 years ago
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
27 changed files with
107 additions and
0 deletions
-
kernel/x86_64/amax.S
-
kernel/x86_64/asum.S
-
kernel/x86_64/dot.S
-
kernel/x86_64/iamax.S
-
kernel/x86_64/izamax.S
-
kernel/x86_64/nrm2.S
-
kernel/x86_64/qconjg.S
-
kernel/x86_64/qdot.S
-
kernel/x86_64/qgemm_kernel_2x2.S
-
kernel/x86_64/qgemv_n.S
-
kernel/x86_64/qgemv_t.S
-
kernel/x86_64/qtrsm_kernel_LN_2x2.S
-
kernel/x86_64/qtrsm_kernel_LT_2x2.S
-
kernel/x86_64/qtrsm_kernel_RT_2x2.S
-
kernel/x86_64/sum.S
-
kernel/x86_64/xdot.S
-
kernel/x86_64/xgemm3m_kernel_2x2.S
-
kernel/x86_64/xgemm_kernel_1x1.S
-
kernel/x86_64/xgemv_n.S
-
kernel/x86_64/xgemv_t.S
-
kernel/x86_64/xtrsm_kernel_LT_1x1.S
-
kernel/x86_64/zamax.S
-
kernel/x86_64/zasum.S
-
kernel/x86_64/zdot.S
-
kernel/x86_64/znrm2.S
-
kernel/x86_64/zscal.S
-
kernel/x86_64/zsum.S
|
|
@@ -54,6 +54,10 @@ |
|
|
|
|
|
|
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
salq $BASE_SHIFT, INCX |
|
|
salq $BASE_SHIFT, INCX |
|
|
|
|
|
|
|
|
|
|
|
@@ -49,6 +49,10 @@ |
|
|
|
|
|
|
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
fldz |
|
|
fldz |
|
|
testq M, M |
|
|
testq M, M |
|
|
|
|
|
@@ -50,6 +50,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
salq $BASE_SHIFT, INCX |
|
|
salq $BASE_SHIFT, INCX |
|
|
salq $BASE_SHIFT, INCY |
|
|
salq $BASE_SHIFT, INCY |
|
|
|
|
|
|
|
|
|
|
|
@@ -60,6 +60,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
salq $BASE_SHIFT, INCX |
|
|
salq $BASE_SHIFT, INCX |
|
|
|
|
|
|
|
|
fldz |
|
|
fldz |
|
|
|
|
|
@@ -60,6 +60,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
salq $ZBASE_SHIFT, INCX |
|
|
salq $ZBASE_SHIFT, INCX |
|
|
|
|
|
|
|
|
fldz |
|
|
fldz |
|
|
|
|
|
@@ -50,6 +50,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
fldz |
|
|
fldz |
|
|
testq M, M |
|
|
testq M, M |
|
|
jle .L999 |
|
|
jle .L999 |
|
|
|
|
|
@@ -42,6 +42,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
fldz |
|
|
fldz |
|
|
FLD 1 * SIZE(ARG1) |
|
|
FLD 1 * SIZE(ARG1) |
|
|
fsubrp %st, %st(1) |
|
|
fsubrp %st, %st(1) |
|
|
|
|
|
@@ -58,6 +58,10 @@ |
|
|
|
|
|
|
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
pushl %edi |
|
|
pushl %edi |
|
|
pushl %esi |
|
|
pushl %esi |
|
|
pushl %ebx |
|
|
pushl %ebx |
|
|
|
|
|
@@ -74,6 +74,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
subq $STACKSIZE, %rsp |
|
|
subq $STACKSIZE, %rsp |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
|
|
|
@@ -76,6 +76,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
subq $STACKSIZE, %rsp |
|
|
subq $STACKSIZE, %rsp |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
|
|
|
@@ -75,6 +75,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
subq $STACKSIZE, %rsp |
|
|
subq $STACKSIZE, %rsp |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
|
|
|
@@ -74,6 +74,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
subq $STACKSIZE, %rsp |
|
|
subq $STACKSIZE, %rsp |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
|
|
|
@@ -74,6 +74,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
subq $STACKSIZE, %rsp |
|
|
subq $STACKSIZE, %rsp |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
|
|
|
@@ -74,6 +74,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
subq $STACKSIZE, %rsp |
|
|
subq $STACKSIZE, %rsp |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
|
|
|
@@ -50,6 +50,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
fldz |
|
|
fldz |
|
|
testq M, M |
|
|
testq M, M |
|
|
jle .L999 |
|
|
jle .L999 |
|
|
|
|
|
@@ -59,6 +59,11 @@ |
|
|
|
|
|
|
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define N %ebx |
|
|
#define N %ebx |
|
|
#define X %esi |
|
|
#define X %esi |
|
|
#define INCX %ecx |
|
|
#define INCX %ecx |
|
|
|
|
|
@@ -78,6 +78,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
subq $STACKSIZE, %rsp |
|
|
subq $STACKSIZE, %rsp |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
|
|
|
@@ -97,6 +97,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
subq $STACKSIZE, %rsp |
|
|
subq $STACKSIZE, %rsp |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
|
|
|
@@ -76,6 +76,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
subq $STACKSIZE, %rsp |
|
|
subq $STACKSIZE, %rsp |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
|
|
|
@@ -75,6 +75,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
subq $STACKSIZE, %rsp |
|
|
subq $STACKSIZE, %rsp |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
|
|
|
@@ -90,6 +90,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
subq $STACKSIZE, %rsp |
|
|
subq $STACKSIZE, %rsp |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbx, 0(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
movq %rbp, 8(%rsp) |
|
|
|
|
|
@@ -55,6 +55,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
salq $ZBASE_SHIFT, INCX |
|
|
salq $ZBASE_SHIFT, INCX |
|
|
|
|
|
|
|
|
fldz |
|
|
fldz |
|
|
|
|
|
@@ -50,6 +50,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
fldz |
|
|
fldz |
|
|
testq M, M |
|
|
testq M, M |
|
|
jle .L999 |
|
|
jle .L999 |
|
|
|
|
|
@@ -55,6 +55,8 @@ |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
|
|
|
movq 40(%rsp), INCY |
|
|
movq 40(%rsp), INCY |
|
|
#endif |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
@@ -50,6 +50,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
fldz |
|
|
fldz |
|
|
testq M, M |
|
|
testq M, M |
|
|
jle .L999 |
|
|
jle .L999 |
|
|
|
|
|
@@ -50,6 +50,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
salq $ZBASE_SHIFT, INCX |
|
|
salq $ZBASE_SHIFT, INCX |
|
|
|
|
|
|
|
|
FLD 8(%rsp) |
|
|
FLD 8(%rsp) |
|
|
|
|
|
@@ -50,6 +50,10 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
PROFCODE |
|
|
PROFCODE |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WINDOWS_ABI |
|
|
|
|
|
emms |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
fldz |
|
|
fldz |
|
|
testq M, M |
|
|
testq M, M |
|
|
jle .L999 |
|
|
jle .L999 |
|
|
|