| @@ -89,17 +89,23 @@ | |||||
| #endif | #endif | ||||
| #define STACKSIZE 16 | #define STACKSIZE 16 | ||||
| #define M 4 + STACKSIZE(%esp) | |||||
| #define N 8 + STACKSIZE(%esp) | |||||
| #define ALPHA 16 + STACKSIZE(%esp) | |||||
| #define A 20 + STACKSIZE(%esp) | |||||
| #define STACK_LDA 24 + STACKSIZE(%esp) | |||||
| #define STACK_X 28 + STACKSIZE(%esp) | |||||
| #define STACK_INCX 32 + STACKSIZE(%esp) | |||||
| #define Y 36 + STACKSIZE(%esp) | |||||
| #define STACK_INCY 40 + STACKSIZE(%esp) | |||||
| #define BUFFER 44 + STACKSIZE(%esp) | |||||
| #define ARGS 16 | |||||
| #define M 4 + STACKSIZE+ARGS(%esp) | |||||
| #define N 8 + STACKSIZE+ARGS(%esp) | |||||
| #define ALPHA 16 + STACKSIZE+ARGS(%esp) | |||||
| #define A 20 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_LDA 24 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_X 28 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_INCX 32 + STACKSIZE+ARGS(%esp) | |||||
| #define Y 36 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_INCY 40 + STACKSIZE+ARGS(%esp) | |||||
| #define BUFFER 44 + STACKSIZE+ARGS(%esp) | |||||
| #define MMM 0+STACKSIZE(%esp) | |||||
| #define NN 4+STACKSIZE(%esp) | |||||
| #define AA 8+STACKSIZE(%esp) | |||||
| #define LDAX 12+STACKSIZE(%esp) | |||||
| #define I %eax | #define I %eax | ||||
| #define J %ebx | #define J %ebx | ||||
| @@ -114,6 +120,7 @@ | |||||
| PROLOGUE | PROLOGUE | ||||
| subl $ARGS,%esp | |||||
| pushl %ebp | pushl %ebp | ||||
| pushl %edi | pushl %edi | ||||
| pushl %esi | pushl %esi | ||||
| @@ -122,6 +129,37 @@ | |||||
| PROFCODE | PROFCODE | ||||
| movl STACK_LDA, LDA | movl STACK_LDA, LDA | ||||
| movl LDA,LDAX # backup LDA | |||||
| movl N,J | |||||
| movl J,NN # backup N | |||||
| movl A,J | |||||
| movl J,AA # backup A | |||||
| movl M,J | |||||
| movl J,MMM # mov M to MMM | |||||
| .L0t: | |||||
| xorl J,J | |||||
| addl $1,J | |||||
| sall $23,J # J=2^22 | |||||
| subl J,MMM # MMM=MMM-J | |||||
| movl J,M | |||||
| jge .L00t | |||||
| ALIGN_4 | |||||
| movl MMM,%eax | |||||
| addl J,%eax | |||||
| jle .L999x | |||||
| movl %eax,M | |||||
| .L00t: | |||||
| movl AA,%eax | |||||
| movl %eax,A # mov AA to A | |||||
| movl NN,%eax | |||||
| movl %eax,N # reset N | |||||
| movl LDAX, LDA # reset LDA | |||||
| movl STACK_X, X | movl STACK_X, X | ||||
| movl STACK_INCX, INCX | movl STACK_INCX, INCX | ||||
| movl STACK_INCY, INCY | movl STACK_INCY, INCY | ||||
| @@ -628,10 +666,19 @@ | |||||
| ALIGN_4 | ALIGN_4 | ||||
| .L999: | .L999: | ||||
| movl M,J | |||||
| leal (,J,SIZE),%eax | |||||
| addl %eax,AA | |||||
| jmp .L0t | |||||
| ALIGN_4 | |||||
| .L999x: | |||||
| popl %ebx | popl %ebx | ||||
| popl %esi | popl %esi | ||||
| popl %edi | popl %edi | ||||
| popl %ebp | popl %ebp | ||||
| addl $ARGS,%esp | |||||
| ret | ret | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -76,18 +76,24 @@ | |||||
| #endif | #endif | ||||
| #define STACKSIZE 16 | #define STACKSIZE 16 | ||||
| #define ARGS 16 | |||||
| #define M 4 + STACKSIZE+ARGS(%esp) | |||||
| #define N 8 + STACKSIZE+ARGS(%esp) | |||||
| #define ALPHA 16 + STACKSIZE+ARGS(%esp) | |||||
| #define A 24 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_LDA 28 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_X 32 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_INCX 36 + STACKSIZE+ARGS(%esp) | |||||
| #define Y 40 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_INCY 44 + STACKSIZE+ARGS(%esp) | |||||
| #define BUFFER 48 + STACKSIZE+ARGS(%esp) | |||||
| #define MMM 0+STACKSIZE(%esp) | |||||
| #define AA 4+STACKSIZE(%esp) | |||||
| #define LDAX 8+STACKSIZE(%esp) | |||||
| #define NN 12+STACKSIZE(%esp) | |||||
| #define M 4 + STACKSIZE(%esp) | |||||
| #define N 8 + STACKSIZE(%esp) | |||||
| #define ALPHA 16 + STACKSIZE(%esp) | |||||
| #define A 24 + STACKSIZE(%esp) | |||||
| #define STACK_LDA 28 + STACKSIZE(%esp) | |||||
| #define STACK_X 32 + STACKSIZE(%esp) | |||||
| #define STACK_INCX 36 + STACKSIZE(%esp) | |||||
| #define Y 40 + STACKSIZE(%esp) | |||||
| #define STACK_INCY 44 + STACKSIZE(%esp) | |||||
| #define BUFFER 48 + STACKSIZE(%esp) | |||||
| #define I %eax | #define I %eax | ||||
| #define J %ebx | #define J %ebx | ||||
| @@ -101,6 +107,8 @@ | |||||
| PROLOGUE | PROLOGUE | ||||
| subl $ARGS,%esp | |||||
| pushl %ebp | pushl %ebp | ||||
| pushl %edi | pushl %edi | ||||
| pushl %esi | pushl %esi | ||||
| @@ -108,7 +116,38 @@ | |||||
| PROFCODE | PROFCODE | ||||
| movl STACK_LDA, LDA | movl STACK_LDA, LDA | ||||
| movl LDA,LDAX # backup LDA | |||||
| movl N,J | |||||
| movl J,NN # backup N | |||||
| movl A,J | |||||
| movl J,AA # backup A | |||||
| movl M,J | |||||
| movl J,MMM # mov M to MMM | |||||
| .L0t: | |||||
| xorl J,J | |||||
| addl $1,J | |||||
| sall $22,J # J=2^22 | |||||
| subl J,MMM # MMM=MMM-J | |||||
| movl J,M | |||||
| jge .L00t | |||||
| ALIGN_4 | |||||
| movl MMM,%eax | |||||
| addl J,%eax | |||||
| jle .L999x | |||||
| movl %eax,M | |||||
| .L00t: | |||||
| movl AA,%eax | |||||
| movl %eax,A # mov AA to A | |||||
| movl NN,%eax | |||||
| movl %eax,N # reset N | |||||
| movl LDAX, LDA # reset LDA | |||||
| movl STACK_X, X | movl STACK_X, X | ||||
| movl STACK_INCX, INCX | movl STACK_INCX, INCX | ||||
| movl STACK_INCY, INCY | movl STACK_INCY, INCY | ||||
| @@ -117,6 +156,7 @@ | |||||
| leal (,INCY, SIZE), INCY | leal (,INCY, SIZE), INCY | ||||
| leal (,LDA, SIZE), LDA | leal (,LDA, SIZE), LDA | ||||
| subl $-16 * SIZE, A | subl $-16 * SIZE, A | ||||
| cmpl $0, N | cmpl $0, N | ||||
| @@ -560,10 +600,19 @@ | |||||
| ALIGN_4 | ALIGN_4 | ||||
| .L999: | .L999: | ||||
| movl M,J | |||||
| leal (,J,SIZE),%eax | |||||
| addl %eax,AA | |||||
| jmp .L0t | |||||
| ALIGN_4 | |||||
| .L999x: | |||||
| popl %ebx | popl %ebx | ||||
| popl %esi | popl %esi | ||||
| popl %edi | popl %edi | ||||
| popl %ebp | popl %ebp | ||||
| addl $ARGS,%esp | |||||
| ret | ret | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -47,7 +47,7 @@ | |||||
| #ifndef WINDOWS_ABI | #ifndef WINDOWS_ABI | ||||
| #define STACKSIZE 64 | |||||
| #define STACKSIZE 128 | |||||
| #define OLD_M %rdi | #define OLD_M %rdi | ||||
| #define OLD_N %rsi | #define OLD_N %rsi | ||||
| @@ -57,6 +57,10 @@ | |||||
| #define STACK_Y 16 + STACKSIZE(%rsp) | #define STACK_Y 16 + STACKSIZE(%rsp) | ||||
| #define STACK_INCY 24 + STACKSIZE(%rsp) | #define STACK_INCY 24 + STACKSIZE(%rsp) | ||||
| #define STACK_BUFFER 32 + STACKSIZE(%rsp) | #define STACK_BUFFER 32 + STACKSIZE(%rsp) | ||||
| #define MMM 56(%rsp) | |||||
| #define NN 64(%rsp) | |||||
| #define AA 72(%rsp) | |||||
| #define LDAX 80(%rsp) | |||||
| #else | #else | ||||
| @@ -71,6 +75,10 @@ | |||||
| #define STACK_Y 72 + STACKSIZE(%rsp) | #define STACK_Y 72 + STACKSIZE(%rsp) | ||||
| #define STACK_INCY 80 + STACKSIZE(%rsp) | #define STACK_INCY 80 + STACKSIZE(%rsp) | ||||
| #define STACK_BUFFER 88 + STACKSIZE(%rsp) | #define STACK_BUFFER 88 + STACKSIZE(%rsp) | ||||
| #defien MMM 216(%rsp) | |||||
| #defien NN 224(%rsp) | |||||
| #define AA 232(%rsp) | |||||
| #define LDAX 240(%rsp) | |||||
| #endif | #endif | ||||
| @@ -127,29 +135,46 @@ | |||||
| movups %xmm14, 192(%rsp) | movups %xmm14, 192(%rsp) | ||||
| movups %xmm15, 208(%rsp) | movups %xmm15, 208(%rsp) | ||||
| movq OLD_M, M | |||||
| movq OLD_N, N | |||||
| movq OLD_A, A | |||||
| movq OLD_LDA, LDA | |||||
| movq OLD_M, MMM | |||||
| movq OLD_N, NN | |||||
| movq OLD_A, AA | |||||
| movq OLD_LDA, LDAX | |||||
| movq OLD_X, X | movq OLD_X, X | ||||
| #else | #else | ||||
| movq OLD_M, M | |||||
| movq OLD_N, N | |||||
| movq OLD_A, A | |||||
| movq OLD_LDA, LDA | |||||
| movq OLD_M, MMM | |||||
| movq OLD_N, NN | |||||
| movq OLD_A, AA | |||||
| movq OLD_LDA, LDAX | |||||
| #endif | #endif | ||||
| movq STACK_INCX, INCX | |||||
| movq STACK_Y, Y | |||||
| movq STACK_INCY, INCY | |||||
| movq STACK_BUFFER, BUFFER | |||||
| #ifndef WINDOWS_ABI | #ifndef WINDOWS_ABI | ||||
| pshufd $0, %xmm0, ALPHA | pshufd $0, %xmm0, ALPHA | ||||
| #else | #else | ||||
| pshufd $0, %xmm3, ALPHA | pshufd $0, %xmm3, ALPHA | ||||
| #endif | #endif | ||||
| .L0t: | |||||
| xorq M,M | |||||
| addq $1,M | |||||
| salq $22,M | |||||
| subq M,MMM | |||||
| jge .L00t | |||||
| ALIGN_4 | |||||
| movq MMM,%rax | |||||
| addq M,%rax | |||||
| jle .L999x | |||||
| movq %rax,M | |||||
| .L00t: | |||||
| movq LDAX,LDA | |||||
| movq NN,N | |||||
| movq AA,A | |||||
| movq STACK_INCX, INCX | |||||
| movq STACK_Y, Y | |||||
| movq STACK_INCY, INCY | |||||
| movq STACK_BUFFER, BUFFER | |||||
| leaq (,INCX, SIZE), INCX | leaq (,INCX, SIZE), INCX | ||||
| leaq (,INCY, SIZE), INCY | leaq (,INCY, SIZE), INCY | ||||
| leaq (,LDA, SIZE), LDA | leaq (,LDA, SIZE), LDA | ||||
| @@ -6341,6 +6366,12 @@ | |||||
| ALIGN_4 | ALIGN_4 | ||||
| .L999: | .L999: | ||||
| leaq (,M,SIZE),%rax | |||||
| addq %rax,AA | |||||
| jmp .L0t | |||||
| ALIGN_4 | |||||
| .L999x: | |||||
| movq 0(%rsp), %rbx | movq 0(%rsp), %rbx | ||||
| movq 8(%rsp), %rbp | movq 8(%rsp), %rbp | ||||
| movq 16(%rsp), %r12 | movq 16(%rsp), %r12 | ||||