Browse Source

Fixed overflow internal buffer bug of (s/d/c/z)gemv on x86_64.

tags/v0.2.7
wangqian 13 years ago
parent
commit
23965f164c
7 changed files with 297 additions and 31 deletions
  1. +53
    -4
      kernel/x86_64/cgemv_n.S
  2. +46
    -2
      kernel/x86_64/cgemv_t.S
  3. +42
    -6
      kernel/x86_64/dgemv_n.S
  4. +48
    -8
      kernel/x86_64/sgemv_n.S
  5. +5
    -5
      kernel/x86_64/sgemv_t.S
  6. +55
    -4
      kernel/x86_64/zgemv_n.S
  7. +48
    -2
      kernel/x86_64/zgemv_t.S

+ 53
- 4
kernel/x86_64/cgemv_n.S View File

@@ -47,14 +47,22 @@

#ifndef WINDOWS_ABI

#define STACKSIZE 64
#define STACKSIZE 128
#define OLD_INCX 8 + STACKSIZE(%rsp)
#define OLD_Y 16 + STACKSIZE(%rsp)
#define OLD_INCY 24 + STACKSIZE(%rsp)
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA 48 (%rsp)

#define MMM 64(%rsp)
#define NN 72(%rsp)
#define AA 80(%rsp)
#define XX 88(%rsp)
#define LDAX 96(%rsp)
#define ALPHAR 104(%rsp)
#define ALPHAI 112(%rsp)

#define M %rdi
#define N %rsi
#define A %rcx
@@ -66,7 +74,7 @@

#else

#define STACKSIZE 256
#define STACKSIZE 288
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
#define OLD_A 48 + STACKSIZE(%rsp)
@@ -78,6 +86,14 @@
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
#define ALPHA 224 (%rsp)

#define MMM 232(%rsp)
#define NN 240(%rsp)
#define AA 248(%rsp)
#define XX 256(%rsp)
#define LDAX 264(%rsp)
#define ALPHAR 272(%rsp)
#define ALPHAI 280(%rsp)

#define M %rcx
#define N %rdx
#define A %r8
@@ -142,9 +158,37 @@
movaps %xmm3, %xmm0
movss OLD_ALPHA_I, %xmm1
#endif
movq A, AA
movq N, NN
movq M, MMM
movq LDA, LDAX
movq X, XX
movq OLD_Y, Y
movss %xmm0,ALPHAR
movss %xmm1,ALPHAI

.L0t:
xorq I,I
addq $1,I
salq $20,I
subq I,MMM
movq I,M
movss ALPHAR,%xmm0
movss ALPHAI,%xmm1
jge .L00t

movq MMM,M
addq I,M
jle .L999x

.L00t:
movq AA, A
movq NN, N
movq LDAX, LDA
movq XX, X

movq OLD_INCX, INCX
movq OLD_Y, Y
# movq OLD_Y, Y
movq OLD_INCY, INCY
movq OLD_BUFFER, BUFFER

@@ -4274,6 +4318,11 @@
ALIGN_3

.L999:
movq M, I
salq $ZBASE_SHIFT,I
addq I,AA
jmp .L0t
.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12


+ 46
- 2
kernel/x86_64/cgemv_t.S View File

@@ -47,13 +47,19 @@

#ifndef WINDOWS_ABI

#define STACKSIZE 64
#define STACKSIZE 128
#define OLD_INCX 8 + STACKSIZE(%rsp)
#define OLD_Y 16 + STACKSIZE(%rsp)
#define OLD_INCY 24 + STACKSIZE(%rsp)
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA 48 (%rsp)
#define MMM 64(%rsp)
#define NN 72(%rsp)
#define AA 80(%rsp)
#define LDAX 88(%rsp)
#define ALPHAR 96(%rsp)
#define ALPHAI 104(%rsp)
#define M %rdi
#define N %rsi
@@ -66,7 +72,7 @@

#else

#define STACKSIZE 256
#define STACKSIZE 288
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
#define OLD_A 48 + STACKSIZE(%rsp)
@@ -78,6 +84,13 @@
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
#define ALPHA 224 (%rsp)

#define MMM 232(%rsp)
#define NN 240(%rsp)
#define AA 248(%rsp)
#define LDAX 256(%rsp)
#define ALPHAR 264(%rsp)
#define ALPHAI 272(%rsp)

#define M %rcx
#define N %rdx
#define A %r8
@@ -144,6 +157,32 @@
movss OLD_ALPHA_I, %xmm1
#endif

movq A, AA
movq N, NN
movq M, MMM
movq LDA, LDAX
movss %xmm0,ALPHAR
movss %xmm1,ALPHAI

.L0t:
xorq I,I
addq $1,I
salq $20,I
subq I,MMM
movq I,M
movss ALPHAR,%xmm0
movss ALPHAI,%xmm1
jge .L00t

movq MMM,M
addq I,M
jle .L999x

.L00t:
movq AA, A
movq NN, N
movq LDAX, LDA

movq OLD_INCX, INCX
movq OLD_Y, Y
movq OLD_INCY, INCY
@@ -4350,6 +4389,11 @@
ALIGN_3

.L999:
movq M, I
salq $ZBASE_SHIFT,I
addq I,AA
jmp .L0t
.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12


+ 42
- 6
kernel/x86_64/dgemv_n.S View File

@@ -47,7 +47,7 @@

#ifndef WINDOWS_ABI

#define STACKSIZE 64
#define STACKSIZE 128
#define OLD_M %rdi
#define OLD_N %rsi
@@ -59,6 +59,11 @@
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA 48 (%rsp)

#define MMM 56(%rsp)
#define NN 64(%rsp)
#define AA 72(%rsp)
#define LDAX 80(%rsp)
#define XX 88(%rsp)
#else

#define STACKSIZE 256
@@ -137,17 +142,42 @@
movq OLD_LDA, LDA
#endif

movq STACK_INCX, INCX
movq STACK_Y, Y
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER

#ifndef WINDOWS_ABI
movsd %xmm0, ALPHA
#else
movsd %xmm3, ALPHA
#endif

movq STACK_Y, Y
movq A,AA
movq N,NN
movq M,MMM
movq LDA,LDAX
movq X,XX

.L0t:
xorq I,I
addq $1,I
salq $21,I
subq I,MMM
movq I,M
jge .L00t

movq MMM,M
addq I,M
jle .L999x
.L00t:
movq XX,X
movq AA,A
movq NN,N
movq LDAX,LDA

movq STACK_INCX, INCX
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER


leaq -1(INCY), %rax

leaq (,INCX, SIZE), INCX
@@ -2815,6 +2845,12 @@
ALIGN_3

.L999:
leaq (, M, SIZE), %rax
addq %rax,AA
jmp .L0t
ALIGN_4

.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12


+ 48
- 8
kernel/x86_64/sgemv_n.S View File

@@ -47,7 +47,7 @@

#ifndef WINDOWS_ABI

#define STACKSIZE 64
#define STACKSIZE 128
#define OLD_M %rdi
#define OLD_N %rsi
@@ -58,10 +58,14 @@
#define STACK_INCY 24 + STACKSIZE(%rsp)
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA 48 (%rsp)

#define MMM 56(%rsp)
#define NN 64(%rsp)
#define AA 72(%rsp)
#define LDAX 80(%rsp)
#define XX 96(%rsp)
#else

#define STACKSIZE 256
#define STACKSIZE 288
#define OLD_M %rcx
#define OLD_N %rdx
@@ -74,6 +78,12 @@
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
#define ALPHA 224 (%rsp)

#define MMM 232(%rsp)
#define NN 240(%rsp)
#define AA 248(%rsp)
#define LDAX 256(%rsp)
#define XX 264(%rsp)
#define
#endif

#define LDA %r8
@@ -137,17 +147,41 @@
movq OLD_LDA, LDA
#endif

movq STACK_INCX, INCX
movq STACK_Y, Y
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER

#ifndef WINDOWS_ABI
movss %xmm0, ALPHA
#else
movss %xmm3, ALPHA
#endif


movq M,MMM
movq A,AA
movq N,NN
movq LDA,LDAX
movq X,XX
movq STACK_Y, Y
.L0t:
xorq I,I
addq $1,I
salq $22,I
subq I,MMM
movq I,M
jge .L00t

movq MMM,M
addq I,M
jle .L999x

.L00t:
movq AA,A
movq NN,N
movq LDAX,LDA
movq XX,X

movq STACK_INCX, INCX
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER

leaq (,INCX, SIZE), INCX
leaq (,INCY, SIZE), INCY
leaq (,LDA, SIZE), LDA
@@ -5990,6 +6024,12 @@
ALIGN_3

.L999:
leaq (,M,SIZE),%rax
addq %rax,AA
jmp .L0t
ALIGN_4

.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12


+ 5
- 5
kernel/x86_64/sgemv_t.S View File

@@ -63,7 +63,7 @@

#else

#define STACKSIZE 256
#define STACKSIZE 288
#define OLD_M %rcx
#define OLD_N %rdx
@@ -74,10 +74,10 @@
#define STACK_Y 72 + STACKSIZE(%rsp)
#define STACK_INCY 80 + STACKSIZE(%rsp)
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
#define MMM 216(%rsp)
#define NN 224(%rsp)
#define AA 232(%rsp)
#define LDAX 240(%rsp)
#define MMM 232(%rsp)
#define NN 240(%rsp)
#define AA 248(%rsp)
#define LDAX 256(%rsp)

#endif



+ 55
- 4
kernel/x86_64/zgemv_n.S View File

@@ -42,7 +42,7 @@

#ifndef WINDOWS_ABI

#define STACKSIZE 64
#define STACKSIZE 128
#define OLD_INCX 8 + STACKSIZE(%rsp)
#define OLD_Y 16 + STACKSIZE(%rsp)
@@ -50,7 +50,15 @@
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA_R 48 (%rsp)
#define ALPHA_I 56 (%rsp)

#define MMM 64(%rsp)
#define NN 72(%rsp)
#define AA 80(%rsp)
#define XX 88(%rsp)
#define LDAX 96(%rsp)
#define ALPHAR 104(%rsp)
#define ALPHAI 112(%rsp)

#define M %rdi
#define N %rsi
#define A %rcx
@@ -62,7 +70,7 @@

#else

#define STACKSIZE 256
#define STACKSIZE 288
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
#define OLD_A 48 + STACKSIZE(%rsp)
@@ -75,6 +83,14 @@
#define ALPHA_R 224 (%rsp)
#define ALPHA_I 232 (%rsp)

#define MMM 232(%rsp)
#define NN 240(%rsp)
#define AA 248(%rsp)
#define XX 256(%rsp)
#define LDAX 264(%rsp)
#define ALPHAR 272(%rsp)
#define ALPHAI 280(%rsp)

#define M %rcx
#define N %rdx
#define A %r8
@@ -136,8 +152,37 @@
movsd OLD_ALPHA_I, %xmm1
#endif

movq OLD_INCX, INCX
movq A, AA
movq N, NN
movq M, MMM
movq LDA, LDAX
movq X, XX
movq OLD_Y, Y
movsd %xmm0,ALPHAR
movsd %xmm1,ALPHAI

.L0t:
xorq I,I
addq $1,I
salq $18,I
subq I,MMM
movq I,M
movsd ALPHAR,%xmm0
movsd ALPHAI,%xmm1
jge .L00t

movq MMM,M
addq I,M
jle .L999x

.L00t:
movq AA, A
movq NN, N
movq LDAX, LDA
movq XX, X

movq OLD_INCX, INCX
# movq OLD_Y, Y
movq OLD_INCY, INCY
movq OLD_BUFFER, BUFFER

@@ -2673,6 +2718,12 @@
ALIGN_3

.L999:
movq M, I
salq $ZBASE_SHIFT,I
addq I,AA
jmp .L0t
.L999x:

movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12


+ 48
- 2
kernel/x86_64/zgemv_t.S View File

@@ -42,13 +42,20 @@

#ifndef WINDOWS_ABI

#define STACKSIZE 64
#define STACKSIZE 128
#define OLD_INCX 8 + STACKSIZE(%rsp)
#define OLD_Y 16 + STACKSIZE(%rsp)
#define OLD_INCY 24 + STACKSIZE(%rsp)
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
#define MMM 64(%rsp)
#define NN 72(%rsp)
#define AA 80(%rsp)
#define LDAX 88(%rsp)
#define ALPHAR 96(%rsp)
#define ALPHAI 104(%rsp)

#define M %rdi
#define N %rsi
#define A %rcx
@@ -60,7 +67,7 @@

#else

#define STACKSIZE 256
#define STACKSIZE 288
#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
#define OLD_A 48 + STACKSIZE(%rsp)
@@ -71,6 +78,13 @@
#define OLD_INCY 88 + STACKSIZE(%rsp)
#define OLD_BUFFER 96 + STACKSIZE(%rsp)

#define MMM 232(%rsp)
#define NN 240(%rsp)
#define AA 248(%rsp)
#define LDAX 256(%rsp)
#define ALPHAR 264(%rsp)
#define ALPHAI 272(%rsp)

#define M %rcx
#define N %rdx
#define A %r8
@@ -135,6 +149,32 @@
movsd OLD_ALPHA_I, %xmm1
#endif

movq A, AA
movq N, NN
movq M, MMM
movq LDA, LDAX
movsd %xmm0,ALPHAR
movsd %xmm1,ALPHAI

.L0t:
xorq I,I
addq $1,I
salq $19,I
subq I,MMM
movq I,M
movsd ALPHAR,%xmm0
movsd ALPHAI,%xmm1
jge .L00t

movq MMM,M
addq I,M
jle .L999x

.L00t:
movq AA, A
movq NN, N
movq LDAX, LDA

movq OLD_INCX, INCX
movq OLD_Y, Y
movq OLD_INCY, INCY
@@ -2405,6 +2445,12 @@
ALIGN_3

.L999:
movq M, I
salq $ZBASE_SHIFT,I
addq I,AA
jmp .L0t
.L999x:

movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12


Loading…
Cancel
Save