Browse Source

Merge pull request #2881 from mattip/fninit

add fninit to reset fpu registers before assembler routines
tags/v0.3.11^2
Martin Kroeker GitHub 5 years ago
parent
commit
0c84ffe05f
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
27 changed files with 107 additions and 0 deletions
  1. +4
    -0
      kernel/x86_64/amax.S
  2. +4
    -0
      kernel/x86_64/asum.S
  3. +4
    -0
      kernel/x86_64/dot.S
  4. +4
    -0
      kernel/x86_64/iamax.S
  5. +4
    -0
      kernel/x86_64/izamax.S
  6. +4
    -0
      kernel/x86_64/nrm2.S
  7. +4
    -0
      kernel/x86_64/qconjg.S
  8. +4
    -0
      kernel/x86_64/qdot.S
  9. +4
    -0
      kernel/x86_64/qgemm_kernel_2x2.S
  10. +4
    -0
      kernel/x86_64/qgemv_n.S
  11. +4
    -0
      kernel/x86_64/qgemv_t.S
  12. +4
    -0
      kernel/x86_64/qtrsm_kernel_LN_2x2.S
  13. +4
    -0
      kernel/x86_64/qtrsm_kernel_LT_2x2.S
  14. +4
    -0
      kernel/x86_64/qtrsm_kernel_RT_2x2.S
  15. +4
    -0
      kernel/x86_64/sum.S
  16. +5
    -0
      kernel/x86_64/xdot.S
  17. +4
    -0
      kernel/x86_64/xgemm3m_kernel_2x2.S
  18. +4
    -0
      kernel/x86_64/xgemm_kernel_1x1.S
  19. +4
    -0
      kernel/x86_64/xgemv_n.S
  20. +4
    -0
      kernel/x86_64/xgemv_t.S
  21. +4
    -0
      kernel/x86_64/xtrsm_kernel_LT_1x1.S
  22. +4
    -0
      kernel/x86_64/zamax.S
  23. +4
    -0
      kernel/x86_64/zasum.S
  24. +2
    -0
      kernel/x86_64/zdot.S
  25. +4
    -0
      kernel/x86_64/znrm2.S
  26. +4
    -0
      kernel/x86_64/zscal.S
  27. +4
    -0
      kernel/x86_64/zsum.S

+ 4
- 0
kernel/x86_64/amax.S View File

@@ -54,6 +54,10 @@

PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif

salq $BASE_SHIFT, INCX



+ 4
- 0
kernel/x86_64/asum.S View File

@@ -49,6 +49,10 @@

PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif

fldz
testq M, M


+ 4
- 0
kernel/x86_64/dot.S View File

@@ -50,6 +50,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

salq $BASE_SHIFT, INCX
salq $BASE_SHIFT, INCY



+ 4
- 0
kernel/x86_64/iamax.S View File

@@ -60,6 +60,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

salq $BASE_SHIFT, INCX

fldz


+ 4
- 0
kernel/x86_64/izamax.S View File

@@ -60,6 +60,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

salq $ZBASE_SHIFT, INCX

fldz


+ 4
- 0
kernel/x86_64/nrm2.S View File

@@ -50,6 +50,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

fldz
testq M, M
jle .L999


+ 4
- 0
kernel/x86_64/qconjg.S View File

@@ -42,6 +42,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

fldz
FLD 1 * SIZE(ARG1)
fsubrp %st, %st(1)


+ 4
- 0
kernel/x86_64/qdot.S View File

@@ -58,6 +58,10 @@

PROLOGUE

#ifdef WINDOWS_ABI
emms
#endif

pushl %edi
pushl %esi
pushl %ebx


+ 4
- 0
kernel/x86_64/qgemm_kernel_2x2.S View File

@@ -74,6 +74,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)


+ 4
- 0
kernel/x86_64/qgemv_n.S View File

@@ -76,6 +76,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)


+ 4
- 0
kernel/x86_64/qgemv_t.S View File

@@ -75,6 +75,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)


+ 4
- 0
kernel/x86_64/qtrsm_kernel_LN_2x2.S View File

@@ -74,6 +74,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)


+ 4
- 0
kernel/x86_64/qtrsm_kernel_LT_2x2.S View File

@@ -74,6 +74,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)


+ 4
- 0
kernel/x86_64/qtrsm_kernel_RT_2x2.S View File

@@ -74,6 +74,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)


+ 4
- 0
kernel/x86_64/sum.S View File

@@ -50,6 +50,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

fldz
testq M, M
jle .L999


+ 5
- 0
kernel/x86_64/xdot.S View File

@@ -59,6 +59,11 @@

PROFCODE

#ifdef WINDOWS_ABI
emms
#endif


#define N %ebx
#define X %esi
#define INCX %ecx


+ 4
- 0
kernel/x86_64/xgemm3m_kernel_2x2.S View File

@@ -78,6 +78,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)


+ 4
- 0
kernel/x86_64/xgemm_kernel_1x1.S View File

@@ -97,6 +97,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)


+ 4
- 0
kernel/x86_64/xgemv_n.S View File

@@ -76,6 +76,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)


+ 4
- 0
kernel/x86_64/xgemv_t.S View File

@@ -75,6 +75,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)


+ 4
- 0
kernel/x86_64/xtrsm_kernel_LT_1x1.S View File

@@ -90,6 +90,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)


+ 4
- 0
kernel/x86_64/zamax.S View File

@@ -55,6 +55,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

salq $ZBASE_SHIFT, INCX

fldz


+ 4
- 0
kernel/x86_64/zasum.S View File

@@ -50,6 +50,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

fldz
testq M, M
jle .L999


+ 2
- 0
kernel/x86_64/zdot.S View File

@@ -55,6 +55,8 @@
PROFCODE

#ifdef WINDOWS_ABI
emms

movq 40(%rsp), INCY
#endif



+ 4
- 0
kernel/x86_64/znrm2.S View File

@@ -50,6 +50,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

fldz
testq M, M
jle .L999


+ 4
- 0
kernel/x86_64/zscal.S View File

@@ -50,6 +50,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

salq $ZBASE_SHIFT, INCX

FLD 8(%rsp)


+ 4
- 0
kernel/x86_64/zsum.S View File

@@ -50,6 +50,10 @@
PROLOGUE
PROFCODE

#ifdef WINDOWS_ABI
emms
#endif

fldz
testq M, M
jle .L999


Loading…
Cancel
Save