| @@ -47,14 +47,22 @@ | |||
| #ifndef WINDOWS_ABI | |||
| #define STACKSIZE 64 | |||
| #define STACKSIZE 128 | |||
| #define OLD_INCX 8 + STACKSIZE(%rsp) | |||
| #define OLD_Y 16 + STACKSIZE(%rsp) | |||
| #define OLD_INCY 24 + STACKSIZE(%rsp) | |||
| #define OLD_BUFFER 32 + STACKSIZE(%rsp) | |||
| #define ALPHA 48 (%rsp) | |||
| #define MMM 64(%rsp) | |||
| #define NN 72(%rsp) | |||
| #define AA 80(%rsp) | |||
| #define XX 88(%rsp) | |||
| #define LDAX 96(%rsp) | |||
| #define ALPHAR 104(%rsp) | |||
| #define ALPHAI 112(%rsp) | |||
| #define M %rdi | |||
| #define N %rsi | |||
| #define A %rcx | |||
| @@ -66,7 +74,7 @@ | |||
| #else | |||
| #define STACKSIZE 256 | |||
| #define STACKSIZE 288 | |||
| #define OLD_ALPHA_I 40 + STACKSIZE(%rsp) | |||
| #define OLD_A 48 + STACKSIZE(%rsp) | |||
| @@ -78,6 +86,14 @@ | |||
| #define OLD_BUFFER 96 + STACKSIZE(%rsp) | |||
| #define ALPHA 224 (%rsp) | |||
| #define MMM 232(%rsp) | |||
| #define NN 240(%rsp) | |||
| #define AA 248(%rsp) | |||
| #define XX 256(%rsp) | |||
| #define LDAX 264(%rsp) | |||
| #define ALPHAR 272(%rsp) | |||
| #define ALPHAI 280(%rsp) | |||
| #define M %rcx | |||
| #define N %rdx | |||
| #define A %r8 | |||
| @@ -142,9 +158,37 @@ | |||
| movaps %xmm3, %xmm0 | |||
| movss OLD_ALPHA_I, %xmm1 | |||
| #endif | |||
| movq A, AA | |||
| movq N, NN | |||
| movq M, MMM | |||
| movq LDA, LDAX | |||
| movq X, XX | |||
| movq OLD_Y, Y | |||
| movss %xmm0,ALPHAR | |||
| movss %xmm1,ALPHAI | |||
| .L0t: | |||
| xorq I,I | |||
| addq $1,I | |||
| salq $20,I | |||
| subq I,MMM | |||
| movq I,M | |||
| movss ALPHAR,%xmm0 | |||
| movss ALPHAI,%xmm1 | |||
| jge .L00t | |||
| movq MMM,M | |||
| addq I,M | |||
| jle .L999x | |||
| .L00t: | |||
| movq AA, A | |||
| movq NN, N | |||
| movq LDAX, LDA | |||
| movq XX, X | |||
| movq OLD_INCX, INCX | |||
| movq OLD_Y, Y | |||
| # movq OLD_Y, Y | |||
| movq OLD_INCY, INCY | |||
| movq OLD_BUFFER, BUFFER | |||
| @@ -4274,6 +4318,11 @@ | |||
| ALIGN_3 | |||
| .L999: | |||
| movq M, I | |||
| salq $ZBASE_SHIFT,I | |||
| addq I,AA | |||
| jmp .L0t | |||
| .L999x: | |||
| movq 0(%rsp), %rbx | |||
| movq 8(%rsp), %rbp | |||
| movq 16(%rsp), %r12 | |||
| @@ -47,13 +47,19 @@ | |||
| #ifndef WINDOWS_ABI | |||
| #define STACKSIZE 64 | |||
| #define STACKSIZE 128 | |||
| #define OLD_INCX 8 + STACKSIZE(%rsp) | |||
| #define OLD_Y 16 + STACKSIZE(%rsp) | |||
| #define OLD_INCY 24 + STACKSIZE(%rsp) | |||
| #define OLD_BUFFER 32 + STACKSIZE(%rsp) | |||
| #define ALPHA 48 (%rsp) | |||
| #define MMM 64(%rsp) | |||
| #define NN 72(%rsp) | |||
| #define AA 80(%rsp) | |||
| #define LDAX 88(%rsp) | |||
| #define ALPHAR 96(%rsp) | |||
| #define ALPHAI 104(%rsp) | |||
| #define M %rdi | |||
| #define N %rsi | |||
| @@ -66,7 +72,7 @@ | |||
| #else | |||
| #define STACKSIZE 256 | |||
| #define STACKSIZE 288 | |||
| #define OLD_ALPHA_I 40 + STACKSIZE(%rsp) | |||
| #define OLD_A 48 + STACKSIZE(%rsp) | |||
| @@ -78,6 +84,13 @@ | |||
| #define OLD_BUFFER 96 + STACKSIZE(%rsp) | |||
| #define ALPHA 224 (%rsp) | |||
| #define MMM 232(%rsp) | |||
| #define NN 240(%rsp) | |||
| #define AA 248(%rsp) | |||
| #define LDAX 256(%rsp) | |||
| #define ALPHAR 264(%rsp) | |||
| #define ALPHAI 272(%rsp) | |||
| #define M %rcx | |||
| #define N %rdx | |||
| #define A %r8 | |||
| @@ -144,6 +157,32 @@ | |||
| movss OLD_ALPHA_I, %xmm1 | |||
| #endif | |||
| movq A, AA | |||
| movq N, NN | |||
| movq M, MMM | |||
| movq LDA, LDAX | |||
| movss %xmm0,ALPHAR | |||
| movss %xmm1,ALPHAI | |||
| .L0t: | |||
| xorq I,I | |||
| addq $1,I | |||
| salq $20,I | |||
| subq I,MMM | |||
| movq I,M | |||
| movss ALPHAR,%xmm0 | |||
| movss ALPHAI,%xmm1 | |||
| jge .L00t | |||
| movq MMM,M | |||
| addq I,M | |||
| jle .L999x | |||
| .L00t: | |||
| movq AA, A | |||
| movq NN, N | |||
| movq LDAX, LDA | |||
| movq OLD_INCX, INCX | |||
| movq OLD_Y, Y | |||
| movq OLD_INCY, INCY | |||
| @@ -4350,6 +4389,11 @@ | |||
| ALIGN_3 | |||
| .L999: | |||
| movq M, I | |||
| salq $ZBASE_SHIFT,I | |||
| addq I,AA | |||
| jmp .L0t | |||
| .L999x: | |||
| movq 0(%rsp), %rbx | |||
| movq 8(%rsp), %rbp | |||
| movq 16(%rsp), %r12 | |||
| @@ -47,7 +47,7 @@ | |||
| #ifndef WINDOWS_ABI | |||
| #define STACKSIZE 64 | |||
| #define STACKSIZE 128 | |||
| #define OLD_M %rdi | |||
| #define OLD_N %rsi | |||
| @@ -59,6 +59,11 @@ | |||
| #define STACK_BUFFER 32 + STACKSIZE(%rsp) | |||
| #define ALPHA 48 (%rsp) | |||
| #define MMM 56(%rsp) | |||
| #define NN 64(%rsp) | |||
| #define AA 72(%rsp) | |||
| #define LDAX 80(%rsp) | |||
| #define XX 88(%rsp) | |||
| #else | |||
| #define STACKSIZE 256 | |||
| @@ -137,17 +142,42 @@ | |||
| movq OLD_LDA, LDA | |||
| #endif | |||
| movq STACK_INCX, INCX | |||
| movq STACK_Y, Y | |||
| movq STACK_INCY, INCY | |||
| movq STACK_BUFFER, BUFFER | |||
| #ifndef WINDOWS_ABI | |||
| movsd %xmm0, ALPHA | |||
| #else | |||
| movsd %xmm3, ALPHA | |||
| #endif | |||
| movq STACK_Y, Y | |||
| movq A,AA | |||
| movq N,NN | |||
| movq M,MMM | |||
| movq LDA,LDAX | |||
| movq X,XX | |||
| .L0t: | |||
| xorq I,I | |||
| addq $1,I | |||
| salq $21,I | |||
| subq I,MMM | |||
| movq I,M | |||
| jge .L00t | |||
| movq MMM,M | |||
| addq I,M | |||
| jle .L999x | |||
| .L00t: | |||
| movq XX,X | |||
| movq AA,A | |||
| movq NN,N | |||
| movq LDAX,LDA | |||
| movq STACK_INCX, INCX | |||
| movq STACK_INCY, INCY | |||
| movq STACK_BUFFER, BUFFER | |||
| leaq -1(INCY), %rax | |||
| leaq (,INCX, SIZE), INCX | |||
| @@ -2815,6 +2845,12 @@ | |||
| ALIGN_3 | |||
| .L999: | |||
| leaq (, M, SIZE), %rax | |||
| addq %rax,AA | |||
| jmp .L0t | |||
| ALIGN_4 | |||
| .L999x: | |||
| movq 0(%rsp), %rbx | |||
| movq 8(%rsp), %rbp | |||
| movq 16(%rsp), %r12 | |||
| @@ -47,7 +47,7 @@ | |||
| #ifndef WINDOWS_ABI | |||
| #define STACKSIZE 64 | |||
| #define STACKSIZE 128 | |||
| #define OLD_M %rdi | |||
| #define OLD_N %rsi | |||
| @@ -58,10 +58,14 @@ | |||
| #define STACK_INCY 24 + STACKSIZE(%rsp) | |||
| #define STACK_BUFFER 32 + STACKSIZE(%rsp) | |||
| #define ALPHA 48 (%rsp) | |||
| #define MMM 56(%rsp) | |||
| #define NN 64(%rsp) | |||
| #define AA 72(%rsp) | |||
| #define LDAX 80(%rsp) | |||
| #define XX 96(%rsp) | |||
| #else | |||
| #define STACKSIZE 256 | |||
| #define STACKSIZE 288 | |||
| #define OLD_M %rcx | |||
| #define OLD_N %rdx | |||
| @@ -74,6 +78,12 @@ | |||
| #define STACK_BUFFER 88 + STACKSIZE(%rsp) | |||
| #define ALPHA 224 (%rsp) | |||
| #define MMM 232(%rsp) | |||
| #define NN 240(%rsp) | |||
| #define AA 248(%rsp) | |||
| #define LDAX 256(%rsp) | |||
| #define XX 264(%rsp) | |||
| #define | |||
| #endif | |||
| #define LDA %r8 | |||
| @@ -137,17 +147,41 @@ | |||
| movq OLD_LDA, LDA | |||
| #endif | |||
| movq STACK_INCX, INCX | |||
| movq STACK_Y, Y | |||
| movq STACK_INCY, INCY | |||
| movq STACK_BUFFER, BUFFER | |||
| #ifndef WINDOWS_ABI | |||
| movss %xmm0, ALPHA | |||
| #else | |||
| movss %xmm3, ALPHA | |||
| #endif | |||
| movq M,MMM | |||
| movq A,AA | |||
| movq N,NN | |||
| movq LDA,LDAX | |||
| movq X,XX | |||
| movq STACK_Y, Y | |||
| .L0t: | |||
| xorq I,I | |||
| addq $1,I | |||
| salq $22,I | |||
| subq I,MMM | |||
| movq I,M | |||
| jge .L00t | |||
| movq MMM,M | |||
| addq I,M | |||
| jle .L999x | |||
| .L00t: | |||
| movq AA,A | |||
| movq NN,N | |||
| movq LDAX,LDA | |||
| movq XX,X | |||
| movq STACK_INCX, INCX | |||
| movq STACK_INCY, INCY | |||
| movq STACK_BUFFER, BUFFER | |||
| leaq (,INCX, SIZE), INCX | |||
| leaq (,INCY, SIZE), INCY | |||
| leaq (,LDA, SIZE), LDA | |||
| @@ -5990,6 +6024,12 @@ | |||
| ALIGN_3 | |||
| .L999: | |||
| leaq (,M,SIZE),%rax | |||
| addq %rax,AA | |||
| jmp .L0t | |||
| ALIGN_4 | |||
| .L999x: | |||
| movq 0(%rsp), %rbx | |||
| movq 8(%rsp), %rbp | |||
| movq 16(%rsp), %r12 | |||
| @@ -63,7 +63,7 @@ | |||
| #else | |||
| #define STACKSIZE 256 | |||
| #define STACKSIZE 288 | |||
| #define OLD_M %rcx | |||
| #define OLD_N %rdx | |||
| @@ -74,10 +74,10 @@ | |||
| #define STACK_Y 72 + STACKSIZE(%rsp) | |||
| #define STACK_INCY 80 + STACKSIZE(%rsp) | |||
| #define STACK_BUFFER 88 + STACKSIZE(%rsp) | |||
| #define MMM 216(%rsp) | |||
| #define NN 224(%rsp) | |||
| #define AA 232(%rsp) | |||
| #define LDAX 240(%rsp) | |||
| #define MMM 232(%rsp) | |||
| #define NN 240(%rsp) | |||
| #define AA 248(%rsp) | |||
| #define LDAX 256(%rsp) | |||
| #endif | |||
| @@ -42,7 +42,7 @@ | |||
| #ifndef WINDOWS_ABI | |||
| #define STACKSIZE 64 | |||
| #define STACKSIZE 128 | |||
| #define OLD_INCX 8 + STACKSIZE(%rsp) | |||
| #define OLD_Y 16 + STACKSIZE(%rsp) | |||
| @@ -50,7 +50,15 @@ | |||
| #define OLD_BUFFER 32 + STACKSIZE(%rsp) | |||
| #define ALPHA_R 48 (%rsp) | |||
| #define ALPHA_I 56 (%rsp) | |||
| #define MMM 64(%rsp) | |||
| #define NN 72(%rsp) | |||
| #define AA 80(%rsp) | |||
| #define XX 88(%rsp) | |||
| #define LDAX 96(%rsp) | |||
| #define ALPHAR 104(%rsp) | |||
| #define ALPHAI 112(%rsp) | |||
| #define M %rdi | |||
| #define N %rsi | |||
| #define A %rcx | |||
| @@ -62,7 +70,7 @@ | |||
| #else | |||
| #define STACKSIZE 256 | |||
| #define STACKSIZE 288 | |||
| #define OLD_ALPHA_I 40 + STACKSIZE(%rsp) | |||
| #define OLD_A 48 + STACKSIZE(%rsp) | |||
| @@ -75,6 +83,14 @@ | |||
| #define ALPHA_R 224 (%rsp) | |||
| #define ALPHA_I 232 (%rsp) | |||
| #define MMM 232(%rsp) | |||
| #define NN 240(%rsp) | |||
| #define AA 248(%rsp) | |||
| #define XX 256(%rsp) | |||
| #define LDAX 264(%rsp) | |||
| #define ALPHAR 272(%rsp) | |||
| #define ALPHAI 280(%rsp) | |||
| #define M %rcx | |||
| #define N %rdx | |||
| #define A %r8 | |||
| @@ -136,8 +152,37 @@ | |||
| movsd OLD_ALPHA_I, %xmm1 | |||
| #endif | |||
| movq OLD_INCX, INCX | |||
| movq A, AA | |||
| movq N, NN | |||
| movq M, MMM | |||
| movq LDA, LDAX | |||
| movq X, XX | |||
| movq OLD_Y, Y | |||
| movsd %xmm0,ALPHAR | |||
| movsd %xmm1,ALPHAI | |||
| .L0t: | |||
| xorq I,I | |||
| addq $1,I | |||
| salq $18,I | |||
| subq I,MMM | |||
| movq I,M | |||
| movsd ALPHAR,%xmm0 | |||
| movsd ALPHAI,%xmm1 | |||
| jge .L00t | |||
| movq MMM,M | |||
| addq I,M | |||
| jle .L999x | |||
| .L00t: | |||
| movq AA, A | |||
| movq NN, N | |||
| movq LDAX, LDA | |||
| movq XX, X | |||
| movq OLD_INCX, INCX | |||
| # movq OLD_Y, Y | |||
| movq OLD_INCY, INCY | |||
| movq OLD_BUFFER, BUFFER | |||
| @@ -2673,6 +2718,12 @@ | |||
| ALIGN_3 | |||
| .L999: | |||
| movq M, I | |||
| salq $ZBASE_SHIFT,I | |||
| addq I,AA | |||
| jmp .L0t | |||
| .L999x: | |||
| movq 0(%rsp), %rbx | |||
| movq 8(%rsp), %rbp | |||
| movq 16(%rsp), %r12 | |||
| @@ -42,13 +42,20 @@ | |||
| #ifndef WINDOWS_ABI | |||
| #define STACKSIZE 64 | |||
| #define STACKSIZE 128 | |||
| #define OLD_INCX 8 + STACKSIZE(%rsp) | |||
| #define OLD_Y 16 + STACKSIZE(%rsp) | |||
| #define OLD_INCY 24 + STACKSIZE(%rsp) | |||
| #define OLD_BUFFER 32 + STACKSIZE(%rsp) | |||
| #define MMM 64(%rsp) | |||
| #define NN 72(%rsp) | |||
| #define AA 80(%rsp) | |||
| #define LDAX 88(%rsp) | |||
| #define ALPHAR 96(%rsp) | |||
| #define ALPHAI 104(%rsp) | |||
| #define M %rdi | |||
| #define N %rsi | |||
| #define A %rcx | |||
| @@ -60,7 +67,7 @@ | |||
| #else | |||
| #define STACKSIZE 256 | |||
| #define STACKSIZE 288 | |||
| #define OLD_ALPHA_I 40 + STACKSIZE(%rsp) | |||
| #define OLD_A 48 + STACKSIZE(%rsp) | |||
| @@ -71,6 +78,13 @@ | |||
| #define OLD_INCY 88 + STACKSIZE(%rsp) | |||
| #define OLD_BUFFER 96 + STACKSIZE(%rsp) | |||
| #define MMM 232(%rsp) | |||
| #define NN 240(%rsp) | |||
| #define AA 248(%rsp) | |||
| #define LDAX 256(%rsp) | |||
| #define ALPHAR 264(%rsp) | |||
| #define ALPHAI 272(%rsp) | |||
| #define M %rcx | |||
| #define N %rdx | |||
| #define A %r8 | |||
| @@ -135,6 +149,32 @@ | |||
| movsd OLD_ALPHA_I, %xmm1 | |||
| #endif | |||
| movq A, AA | |||
| movq N, NN | |||
| movq M, MMM | |||
| movq LDA, LDAX | |||
| movsd %xmm0,ALPHAR | |||
| movsd %xmm1,ALPHAI | |||
| .L0t: | |||
| xorq I,I | |||
| addq $1,I | |||
| salq $19,I | |||
| subq I,MMM | |||
| movq I,M | |||
| movsd ALPHAR,%xmm0 | |||
| movsd ALPHAI,%xmm1 | |||
| jge .L00t | |||
| movq MMM,M | |||
| addq I,M | |||
| jle .L999x | |||
| .L00t: | |||
| movq AA, A | |||
| movq NN, N | |||
| movq LDAX, LDA | |||
| movq OLD_INCX, INCX | |||
| movq OLD_Y, Y | |||
| movq OLD_INCY, INCY | |||
| @@ -2405,6 +2445,12 @@ | |||
| ALIGN_3 | |||
| .L999: | |||
| movq M, I | |||
| salq $ZBASE_SHIFT,I | |||
| addq I,AA | |||
| jmp .L0t | |||
| .L999x: | |||
| movq 0(%rsp), %rbx | |||
| movq 8(%rsp), %rbp | |||
| movq 16(%rsp), %r12 | |||