Browse Source

add fninit to reset fpu registers before assembler routines

tags/v0.3.11^2
Matti Picus 5 years ago
parent
commit
a5b164946c
27 changed files with 50 additions and 1 deletions
  1. +2
    -0
      kernel/x86_64/amax.S
  2. +2
    -1
      kernel/x86_64/asum.S
  3. +1
    -0
      kernel/x86_64/dot.S
  4. +1
    -0
      kernel/x86_64/iamax.S
  5. +1
    -0
      kernel/x86_64/izamax.S
  6. +1
    -0
      kernel/x86_64/nrm2.S
  7. +1
    -0
      kernel/x86_64/qconjg.S
  8. +2
    -0
      kernel/x86_64/qdot.S
  9. +2
    -0
      kernel/x86_64/qgemm_kernel_2x2.S
  10. +2
    -0
      kernel/x86_64/qgemv_n.S
  11. +1
    -0
      kernel/x86_64/qgemv_t.S
  12. +2
    -0
      kernel/x86_64/qtrsm_kernel_LN_2x2.S
  13. +2
    -0
      kernel/x86_64/qtrsm_kernel_LT_2x2.S
  14. +3
    -0
      kernel/x86_64/qtrsm_kernel_RT_2x2.S
  15. +2
    -0
      kernel/x86_64/sum.S
  16. +3
    -0
      kernel/x86_64/xdot.S
  17. +2
    -0
      kernel/x86_64/xgemm3m_kernel_2x2.S
  18. +2
    -0
      kernel/x86_64/xgemm_kernel_1x1.S
  19. +2
    -0
      kernel/x86_64/xgemv_n.S
  20. +2
    -0
      kernel/x86_64/xgemv_t.S
  21. +2
    -0
      kernel/x86_64/xtrsm_kernel_LT_1x1.S
  22. +2
    -0
      kernel/x86_64/zamax.S
  23. +2
    -0
      kernel/x86_64/zasum.S
  24. +2
    -0
      kernel/x86_64/zdot.S
  25. +2
    -0
      kernel/x86_64/znrm2.S
  26. +2
    -0
      kernel/x86_64/zscal.S
  27. +2
    -0
      kernel/x86_64/zsum.S

+ 2
- 0
kernel/x86_64/amax.S View File

@@ -54,6 +54,8 @@


PROLOGUE PROLOGUE
PROFCODE PROFCODE
fninit


salq $BASE_SHIFT, INCX salq $BASE_SHIFT, INCX




+ 2
- 1
kernel/x86_64/asum.S View File

@@ -49,7 +49,8 @@


PROLOGUE PROLOGUE
PROFCODE PROFCODE

fninit
fldz fldz
testq M, M testq M, M
jle .L999 jle .L999


+ 1
- 0
kernel/x86_64/dot.S View File

@@ -49,6 +49,7 @@


PROLOGUE PROLOGUE
PROFCODE PROFCODE
fninit


salq $BASE_SHIFT, INCX salq $BASE_SHIFT, INCX
salq $BASE_SHIFT, INCY salq $BASE_SHIFT, INCY


+ 1
- 0
kernel/x86_64/iamax.S View File

@@ -59,6 +59,7 @@


PROLOGUE PROLOGUE
PROFCODE PROFCODE
fninit


salq $BASE_SHIFT, INCX salq $BASE_SHIFT, INCX




+ 1
- 0
kernel/x86_64/izamax.S View File

@@ -59,6 +59,7 @@


PROLOGUE PROLOGUE
PROFCODE PROFCODE
fninit


salq $ZBASE_SHIFT, INCX salq $ZBASE_SHIFT, INCX




+ 1
- 0
kernel/x86_64/nrm2.S View File

@@ -50,6 +50,7 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit
fldz fldz
testq M, M testq M, M
jle .L999 jle .L999


+ 1
- 0
kernel/x86_64/qconjg.S View File

@@ -41,6 +41,7 @@


PROLOGUE PROLOGUE
PROFCODE PROFCODE
fninit


fldz fldz
FLD 1 * SIZE(ARG1) FLD 1 * SIZE(ARG1)


+ 2
- 0
kernel/x86_64/qdot.S View File

@@ -58,6 +58,8 @@


PROLOGUE PROLOGUE


fninit

pushl %edi pushl %edi
pushl %esi pushl %esi
pushl %ebx pushl %ebx


+ 2
- 0
kernel/x86_64/qgemm_kernel_2x2.S View File

@@ -74,6 +74,8 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit

subq $STACKSIZE, %rsp subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp) movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp) movq %rbp, 8(%rsp)


+ 2
- 0
kernel/x86_64/qgemv_n.S View File

@@ -76,6 +76,8 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit

subq $STACKSIZE, %rsp subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp) movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp) movq %rbp, 8(%rsp)


+ 1
- 0
kernel/x86_64/qgemv_t.S View File

@@ -75,6 +75,7 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit
subq $STACKSIZE, %rsp subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp) movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp) movq %rbp, 8(%rsp)


+ 2
- 0
kernel/x86_64/qtrsm_kernel_LN_2x2.S View File

@@ -74,6 +74,8 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit

subq $STACKSIZE, %rsp subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp) movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp) movq %rbp, 8(%rsp)


+ 2
- 0
kernel/x86_64/qtrsm_kernel_LT_2x2.S View File

@@ -74,6 +74,8 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit

subq $STACKSIZE, %rsp subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp) movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp) movq %rbp, 8(%rsp)


+ 3
- 0
kernel/x86_64/qtrsm_kernel_RT_2x2.S View File

@@ -74,6 +74,9 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit


subq $STACKSIZE, %rsp subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp) movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp) movq %rbp, 8(%rsp)


+ 2
- 0
kernel/x86_64/sum.S View File

@@ -50,6 +50,8 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit

fldz fldz
testq M, M testq M, M
jle .L999 jle .L999


+ 3
- 0
kernel/x86_64/xdot.S View File

@@ -59,6 +59,9 @@


PROFCODE PROFCODE


fninit


#define N %ebx #define N %ebx
#define X %esi #define X %esi
#define INCX %ecx #define INCX %ecx


+ 2
- 0
kernel/x86_64/xgemm3m_kernel_2x2.S View File

@@ -78,6 +78,8 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit

subq $STACKSIZE, %rsp subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp) movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp) movq %rbp, 8(%rsp)


+ 2
- 0
kernel/x86_64/xgemm_kernel_1x1.S View File

@@ -97,6 +97,8 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit

subq $STACKSIZE, %rsp subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp) movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp) movq %rbp, 8(%rsp)


+ 2
- 0
kernel/x86_64/xgemv_n.S View File

@@ -76,6 +76,8 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit

subq $STACKSIZE, %rsp subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp) movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp) movq %rbp, 8(%rsp)


+ 2
- 0
kernel/x86_64/xgemv_t.S View File

@@ -75,6 +75,8 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit

subq $STACKSIZE, %rsp subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp) movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp) movq %rbp, 8(%rsp)


+ 2
- 0
kernel/x86_64/xtrsm_kernel_LT_1x1.S View File

@@ -90,6 +90,8 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit

subq $STACKSIZE, %rsp subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp) movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp) movq %rbp, 8(%rsp)


+ 2
- 0
kernel/x86_64/zamax.S View File

@@ -55,6 +55,8 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit

salq $ZBASE_SHIFT, INCX salq $ZBASE_SHIFT, INCX


fldz fldz


+ 2
- 0
kernel/x86_64/zasum.S View File

@@ -50,6 +50,8 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit

fldz fldz
testq M, M testq M, M
jle .L999 jle .L999


+ 2
- 0
kernel/x86_64/zdot.S View File

@@ -54,6 +54,8 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit

#ifdef WINDOWS_ABI #ifdef WINDOWS_ABI
movq 40(%rsp), INCY movq 40(%rsp), INCY
#endif #endif


+ 2
- 0
kernel/x86_64/znrm2.S View File

@@ -50,6 +50,8 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit

fldz fldz
testq M, M testq M, M
jle .L999 jle .L999


+ 2
- 0
kernel/x86_64/zscal.S View File

@@ -50,6 +50,8 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit

salq $ZBASE_SHIFT, INCX salq $ZBASE_SHIFT, INCX


FLD 8(%rsp) FLD 8(%rsp)


+ 2
- 0
kernel/x86_64/zsum.S View File

@@ -50,6 +50,8 @@
PROLOGUE PROLOGUE
PROFCODE PROFCODE


fninit

fldz fldz
testq M, M testq M, M
jle .L999 jle .L999


Loading…
Cancel
Save