This website works better with JavaScript.
Home
Issues
Pull Requests
Milestones
AI流水线
Repositories
Datasets
Forum
实训
竞赛
大数据
AI开发
Register
Sign In
OSchip
/
OpenBLAS
Not watched
Unwatch
Watch all
Watch but not notify
1
Star
0
Fork
0
Code
Releases
66
Wiki
evaluate
Activity
Issues
0
Pull Requests
0
Datasets
Model
Cloudbrain
HPC
Browse Source
Merge pull request
#2881
from mattip/fninit
add fninit to reset fpu registers before assembler routines
tags/v0.3.11^2
Martin Kroeker
GitHub
5 years ago
parent
cb4274e3ad
403eb513a0
commit
0c84ffe05f
No known key found for this signature in database
GPG Key ID:
4AEE18F83AFDEB23
27 changed files
with
107 additions
and
0 deletions
Split View
Diff Options
Show Stats
Download Patch File
Download Diff File
+4
-0
kernel/x86_64/amax.S
+4
-0
kernel/x86_64/asum.S
+4
-0
kernel/x86_64/dot.S
+4
-0
kernel/x86_64/iamax.S
+4
-0
kernel/x86_64/izamax.S
+4
-0
kernel/x86_64/nrm2.S
+4
-0
kernel/x86_64/qconjg.S
+4
-0
kernel/x86_64/qdot.S
+4
-0
kernel/x86_64/qgemm_kernel_2x2.S
+4
-0
kernel/x86_64/qgemv_n.S
+4
-0
kernel/x86_64/qgemv_t.S
+4
-0
kernel/x86_64/qtrsm_kernel_LN_2x2.S
+4
-0
kernel/x86_64/qtrsm_kernel_LT_2x2.S
+4
-0
kernel/x86_64/qtrsm_kernel_RT_2x2.S
+4
-0
kernel/x86_64/sum.S
+5
-0
kernel/x86_64/xdot.S
+4
-0
kernel/x86_64/xgemm3m_kernel_2x2.S
+4
-0
kernel/x86_64/xgemm_kernel_1x1.S
+4
-0
kernel/x86_64/xgemv_n.S
+4
-0
kernel/x86_64/xgemv_t.S
+4
-0
kernel/x86_64/xtrsm_kernel_LT_1x1.S
+4
-0
kernel/x86_64/zamax.S
+4
-0
kernel/x86_64/zasum.S
+2
-0
kernel/x86_64/zdot.S
+4
-0
kernel/x86_64/znrm2.S
+4
-0
kernel/x86_64/zscal.S
+4
-0
kernel/x86_64/zsum.S
+ 4
- 0
kernel/x86_64/amax.S
View File
@@ -54,6 +54,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
salq $BASE_SHIFT, INCX
+ 4
- 0
kernel/x86_64/asum.S
View File
@@ -49,6 +49,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
fldz
testq M, M
+ 4
- 0
kernel/x86_64/dot.S
View File
@@ -50,6 +50,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
salq $BASE_SHIFT, INCX
salq $BASE_SHIFT, INCY
+ 4
- 0
kernel/x86_64/iamax.S
View File
@@ -60,6 +60,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
salq $BASE_SHIFT, INCX
fldz
+ 4
- 0
kernel/x86_64/izamax.S
View File
@@ -60,6 +60,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
salq $ZBASE_SHIFT, INCX
fldz
+ 4
- 0
kernel/x86_64/nrm2.S
View File
@@ -50,6 +50,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
fldz
testq M, M
jle .L999
+ 4
- 0
kernel/x86_64/qconjg.S
View File
@@ -42,6 +42,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
fldz
FLD 1 * SIZE(ARG1)
fsubrp %st, %st(1)
+ 4
- 0
kernel/x86_64/qdot.S
View File
@@ -58,6 +58,10 @@
PROLOGUE
#ifdef WINDOWS_ABI
emms
#endif
pushl %edi
pushl %esi
pushl %ebx
+ 4
- 0
kernel/x86_64/qgemm_kernel_2x2.S
View File
@@ -74,6 +74,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)
+ 4
- 0
kernel/x86_64/qgemv_n.S
View File
@@ -76,6 +76,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)
+ 4
- 0
kernel/x86_64/qgemv_t.S
View File
@@ -75,6 +75,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)
+ 4
- 0
kernel/x86_64/qtrsm_kernel_LN_2x2.S
View File
@@ -74,6 +74,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)
+ 4
- 0
kernel/x86_64/qtrsm_kernel_LT_2x2.S
View File
@@ -74,6 +74,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)
+ 4
- 0
kernel/x86_64/qtrsm_kernel_RT_2x2.S
View File
@@ -74,6 +74,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)
+ 4
- 0
kernel/x86_64/sum.S
View File
@@ -50,6 +50,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
fldz
testq M, M
jle .L999
+ 5
- 0
kernel/x86_64/xdot.S
View File
@@ -59,6 +59,11 @@
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
#define N %ebx
#define X %esi
#define INCX %ecx
+ 4
- 0
kernel/x86_64/xgemm3m_kernel_2x2.S
View File
@@ -78,6 +78,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)
+ 4
- 0
kernel/x86_64/xgemm_kernel_1x1.S
View File
@@ -97,6 +97,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)
+ 4
- 0
kernel/x86_64/xgemv_n.S
View File
@@ -76,6 +76,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)
+ 4
- 0
kernel/x86_64/xgemv_t.S
View File
@@ -75,6 +75,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)
+ 4
- 0
kernel/x86_64/xtrsm_kernel_LT_1x1.S
View File
@@ -90,6 +90,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
subq $STACKSIZE, %rsp
movq %rbx, 0(%rsp)
movq %rbp, 8(%rsp)
+ 4
- 0
kernel/x86_64/zamax.S
View File
@@ -55,6 +55,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
salq $ZBASE_SHIFT, INCX
fldz
+ 4
- 0
kernel/x86_64/zasum.S
View File
@@ -50,6 +50,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
fldz
testq M, M
jle .L999
+ 2
- 0
kernel/x86_64/zdot.S
View File
@@ -55,6 +55,8 @@
PROFCODE
#ifdef WINDOWS_ABI
emms
movq 40(%rsp), INCY
#endif
+ 4
- 0
kernel/x86_64/znrm2.S
View File
@@ -50,6 +50,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
fldz
testq M, M
jle .L999
+ 4
- 0
kernel/x86_64/zscal.S
View File
@@ -50,6 +50,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
salq $ZBASE_SHIFT, INCX
FLD 8(%rsp)
+ 4
- 0
kernel/x86_64/zsum.S
View File
@@ -50,6 +50,10 @@
PROLOGUE
PROFCODE
#ifdef WINDOWS_ABI
emms
#endif
fldz
testq M, M
jle .L999
Write
Preview
Loading…
Cancel
Save