| @@ -101,10 +101,10 @@ | |||
| #define Y 36 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_INCY 40 + STACKSIZE+ARGS(%esp) | |||
| #define BUFFER 44 + STACKSIZE+ARGS(%esp) | |||
| #define MMM 0+ARGS(%esp) | |||
| #define YY 4+ARGS(%esp) | |||
| #define AA 8+ARGS(%esp) | |||
| #define LDAX 12+ARGS(%esp) | |||
| #define I %eax | |||
| #define J %ebx | |||
| @@ -153,8 +153,8 @@ | |||
| movl YY,J | |||
| movl J,Y | |||
| movl STACK_LDA, LDA | |||
| movl STACK_LDA, LDA | |||
| movl STACK_X, X | |||
| movl STACK_INCX, INCX | |||
| @@ -688,9 +688,9 @@ | |||
| movl M,J | |||
| leal (,J,SIZE),%eax | |||
| addl %eax,AA | |||
| movl YY,J | |||
| addl %eax,J | |||
| movl J,YY | |||
| movl STACK_INCY,INCY | |||
| imull INCY,%eax | |||
| addl %eax,YY | |||
| jmp .L0t | |||
| ALIGN_4 | |||
| @@ -714,9 +714,9 @@ | |||
| movl M,J | |||
| leal (,J,SIZE),%eax | |||
| addl %eax,AA | |||
| movl YY,J | |||
| addl %eax,J | |||
| movl J,YY | |||
| movl STACK_INCY,INCY | |||
| imull INCY,%eax | |||
| addl %eax,YY | |||
| jmp .L0t | |||
| ALIGN_4 | |||
| @@ -102,11 +102,9 @@ | |||
| #define STACK_INCY 40 + STACKSIZE+ARGS(%esp) | |||
| #define BUFFER 44 + STACKSIZE+ARGS(%esp) | |||
| #define MMM 0+STACKSIZE(%esp) | |||
| #define NN 4+STACKSIZE(%esp) | |||
| #define AA 8+STACKSIZE(%esp) | |||
| #define LDAX 12+STACKSIZE(%esp) | |||
| #define XX 16+STACKSIZE(%esp) | |||
| #define MMM 0+ARGS(%esp) | |||
| #define AA 4+ARGS(%esp) | |||
| #define XX 8+ARGS(%esp) | |||
| #define I %eax | |||
| #define J %ebx | |||
| @@ -129,12 +127,8 @@ | |||
| PROFCODE | |||
| movl STACK_LDA, LDA | |||
| movl LDA,LDAX # backup LDA | |||
| movl STACK_X, X | |||
| movl X,XX | |||
| movl N,J | |||
| movl J,NN # backup N | |||
| movl A,J | |||
| movl J,AA # backup A | |||
| movl M,J | |||
| @@ -144,7 +138,6 @@ | |||
| addl $1,J | |||
| sall $22,J # J=2^24*sizeof(float)=buffer size(16MB) | |||
| subl $8, J # Don't use last 8 float in the buffer. | |||
| # Now, split M by block J | |||
| subl J,MMM # MMM=MMM-J | |||
| movl J,M | |||
| jge .L00t | |||
| @@ -159,13 +152,10 @@ | |||
| movl AA,%eax | |||
| movl %eax,A # mov AA to A | |||
| movl NN,%eax | |||
| movl %eax,N # reset N | |||
| movl LDAX, LDA # reset LDA | |||
| movl XX,X | |||
| movl XX,%eax | |||
| movl %eax,X | |||
| movl STACK_LDA, LDA | |||
| movl STACK_INCX, INCX | |||
| movl STACK_INCY, INCY | |||
| @@ -688,9 +678,9 @@ | |||
| movl M,J | |||
| leal (,J,SIZE),%eax | |||
| addl %eax,AA | |||
| movl XX,J | |||
| addl %eax,J | |||
| movl J,XX | |||
| movl STACK_INCX,INCX | |||
| imull INCX,%eax | |||
| addl %eax,XX | |||
| jmp .L0t | |||
| ALIGN_4 | |||
| @@ -76,7 +76,7 @@ | |||
| #endif | |||
| #define STACKSIZE 16 | |||
| #define ARGS 16 | |||
| #define ARGS 20 | |||
| #define M 4 + STACKSIZE+ARGS(%esp) | |||
| #define N 8 + STACKSIZE+ARGS(%esp) | |||
| @@ -89,10 +89,9 @@ | |||
| #define STACK_INCY 44 + STACKSIZE+ARGS(%esp) | |||
| #define BUFFER 48 + STACKSIZE+ARGS(%esp) | |||
| #define MMM 0+STACKSIZE(%esp) | |||
| #define AA 4+STACKSIZE(%esp) | |||
| #define LDAX 8+STACKSIZE(%esp) | |||
| #define NN 12+STACKSIZE(%esp) | |||
| #define MMM 0+ARGS(%esp) | |||
| #define AA 4+ARGS(%esp) | |||
| #define XX 8+ARGS(%esp) | |||
| #define I %eax | |||
| #define J %ebx | |||
| @@ -117,10 +116,8 @@ | |||
| PROFCODE | |||
| movl STACK_LDA, LDA | |||
| movl LDA,LDAX # backup LDA | |||
| movl N,J | |||
| movl J,NN # backup N | |||
| movl STACK_X, X | |||
| movl X,XX | |||
| movl A,J | |||
| movl J,AA # backup A | |||
| movl M,J | |||
| @@ -130,7 +127,6 @@ | |||
| addl $1,J | |||
| sall $21,J # J=2^21*sizeof(double)=buffer size(16MB) | |||
| subl $4, J # Don't use last 4 double in the buffer. | |||
| # Now, split M by block J | |||
| subl J,MMM # MMM=MMM-J | |||
| movl J,M | |||
| jge .L00t | |||
| @@ -142,15 +138,13 @@ | |||
| movl %eax,M | |||
| .L00t: | |||
| movl XX,%eax | |||
| movl %eax, X | |||
| movl AA,%eax | |||
| movl %eax,A # mov AA to A | |||
| movl NN,%eax | |||
| movl %eax,N # reset N | |||
| movl LDAX, LDA # reset LDA | |||
| movl STACK_X, X | |||
| movl STACK_LDA, LDA | |||
| movl STACK_INCX, INCX | |||
| movl STACK_INCY, INCY | |||
| @@ -605,6 +599,9 @@ | |||
| movl M,J | |||
| leal (,J,SIZE),%eax | |||
| addl %eax,AA | |||
| movl STACK_INCX,INCX | |||
| imull INCX,%eax | |||
| addl %eax,XX | |||
| jmp .L0t | |||
| ALIGN_4 | |||
| @@ -89,18 +89,23 @@ | |||
| #endif | |||
| #define STACKSIZE 16 | |||
| #define M 4 + STACKSIZE(%esp) | |||
| #define N 8 + STACKSIZE(%esp) | |||
| #define ALPHA_R 16 + STACKSIZE(%esp) | |||
| #define ALPHA_I 20 + STACKSIZE(%esp) | |||
| #define A 24 + STACKSIZE(%esp) | |||
| #define STACK_LDA 28 + STACKSIZE(%esp) | |||
| #define STACK_X 32 + STACKSIZE(%esp) | |||
| #define STACK_INCX 36 + STACKSIZE(%esp) | |||
| #define Y 40 + STACKSIZE(%esp) | |||
| #define STACK_INCY 44 + STACKSIZE(%esp) | |||
| #define BUFFER 48 + STACKSIZE(%esp) | |||
| #define ARGS 20 | |||
| #define M 4 + STACKSIZE+ARGS(%esp) | |||
| #define N 8 + STACKSIZE+ARGS(%esp) | |||
| #define ALPHA_R 16 + STACKSIZE+ARGS(%esp) | |||
| #define ALPHA_I 20 + STACKSIZE+ARGS(%esp) | |||
| #define A 24 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_LDA 28 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_X 32 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_INCX 36 + STACKSIZE+ARGS(%esp) | |||
| #define Y 40 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_INCY 44 + STACKSIZE+ARGS(%esp) | |||
| #define BUFFER 48 + STACKSIZE+ARGS(%esp) | |||
| #define MMM 0+ARGS(%esp) | |||
| #define YY 4+ARGS(%esp) | |||
| #define AA 8+ARGS(%esp) | |||
| #define I %eax | |||
| #define J %ebx | |||
| @@ -123,6 +128,7 @@ | |||
| PROLOGUE | |||
| subl $ARGS,%esp | |||
| pushl %ebp | |||
| pushl %edi | |||
| pushl %esi | |||
| @@ -130,6 +136,33 @@ | |||
| PROFCODE | |||
| movl Y,J | |||
| movl J,YY | |||
| movl A,J | |||
| movl J,AA | |||
| movl M,J | |||
| movl J,MMM | |||
| .L0t: | |||
| xorl J,J | |||
| addl $1,J | |||
| sall $20,J | |||
| subl J,MMM | |||
| movl J,M | |||
| jge .L00t | |||
| ALIGN_3 | |||
| movl MMM,%eax | |||
| addl J,%eax | |||
| jle .L999x | |||
| movl %eax,M | |||
| .L00t: | |||
| movl AA,%eax | |||
| movl %eax,A | |||
| movl YY,J | |||
| movl J,Y | |||
| movl STACK_LDA, LDA | |||
| movl STACK_X, X | |||
| movl STACK_INCX, INCX | |||
| @@ -595,10 +628,21 @@ | |||
| ALIGN_3 | |||
| .L999: | |||
| movl M,%eax | |||
| sall $ZBASE_SHIFT,%eax | |||
| addl %eax,AA | |||
| movl STACK_INCY,INCY | |||
| imull INCY,%eax | |||
| addl %eax,YY | |||
| jmp .L0t | |||
| ALIGN_3 | |||
| .L999x: | |||
| popl %ebx | |||
| popl %esi | |||
| popl %edi | |||
| popl %ebp | |||
| addl $ARGS,%esp | |||
| ret | |||
| EPILOGUE | |||
| @@ -76,18 +76,23 @@ | |||
| #endif | |||
| #define STACKSIZE 16 | |||
| #define ARGS 16 | |||
| #define M 4 + STACKSIZE+ARGS(%esp) | |||
| #define N 8 + STACKSIZE+ARGS(%esp) | |||
| #define ALPHA_R 16 + STACKSIZE+ARGS(%esp) | |||
| #define ALPHA_I 24 + STACKSIZE+ARGS(%esp) | |||
| #define A 32 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_LDA 36 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_X 40 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_INCX 44 + STACKSIZE+ARGS(%esp) | |||
| #define Y 48 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_INCY 52 + STACKSIZE+ARGS(%esp) | |||
| #define BUFFER 56 + STACKSIZE+ARGS(%esp) | |||
| #define MMM 0 + ARGS(%esp) | |||
| #define YY 4 + ARGS(%esp) | |||
| #define AA 8 + ARGS(%esp) | |||
| #define M 4 + STACKSIZE(%esp) | |||
| #define N 8 + STACKSIZE(%esp) | |||
| #define ALPHA_R 16 + STACKSIZE(%esp) | |||
| #define ALPHA_I 24 + STACKSIZE(%esp) | |||
| #define A 32 + STACKSIZE(%esp) | |||
| #define STACK_LDA 36 + STACKSIZE(%esp) | |||
| #define STACK_X 40 + STACKSIZE(%esp) | |||
| #define STACK_INCX 44 + STACKSIZE(%esp) | |||
| #define Y 48 + STACKSIZE(%esp) | |||
| #define STACK_INCY 52 + STACKSIZE(%esp) | |||
| #define BUFFER 56 + STACKSIZE(%esp) | |||
| #define I %eax | |||
| #define J %ebx | |||
| @@ -110,6 +115,7 @@ | |||
| PROLOGUE | |||
| subl $ARGS,%esp | |||
| pushl %ebp | |||
| pushl %edi | |||
| pushl %esi | |||
| @@ -117,6 +123,33 @@ | |||
| PROFCODE | |||
| movl Y,J | |||
| movl J,YY | |||
| movl A,J | |||
| movl J,AA | |||
| movl M,J | |||
| movl J,MMM | |||
| .L0t: | |||
| xorl J,J | |||
| addl $1,J | |||
| sall $18,J | |||
| subl J,MMM | |||
| movl J,M | |||
| jge .L00t | |||
| ALIGN_3 | |||
| movl MMM,%eax | |||
| addl J,%eax | |||
| jle .L999x | |||
| movl %eax,M | |||
| .L00t: | |||
| movl AA,%eax | |||
| movl %eax,A | |||
| movl YY,J | |||
| movl J,Y | |||
| movl STACK_LDA, LDA | |||
| movl STACK_X, X | |||
| movl STACK_INCX, INCX | |||
| @@ -458,10 +491,21 @@ | |||
| ALIGN_3 | |||
| .L999: | |||
| movl M,%eax | |||
| sall $ZBASE_SHIFT,%eax | |||
| addl %eax,AA | |||
| movl STACK_INCY,INCY | |||
| imull INCY,%eax | |||
| addl %eax,YY | |||
| jmp .L0t | |||
| ALIGN_3 | |||
| .L999x: | |||
| popl %ebx | |||
| popl %esi | |||
| popl %edi | |||
| popl %ebp | |||
| addl $ARGS,%esp | |||
| ret | |||
| EPILOGUE | |||
| @@ -89,18 +89,23 @@ | |||
| #endif | |||
| #define STACKSIZE 16 | |||
| #define M 4 + STACKSIZE(%esp) | |||
| #define N 8 + STACKSIZE(%esp) | |||
| #define ALPHA_R 16 + STACKSIZE(%esp) | |||
| #define ALPHA_I 20 + STACKSIZE(%esp) | |||
| #define A 24 + STACKSIZE(%esp) | |||
| #define STACK_LDA 28 + STACKSIZE(%esp) | |||
| #define STACK_X 32 + STACKSIZE(%esp) | |||
| #define STACK_INCX 36 + STACKSIZE(%esp) | |||
| #define Y 40 + STACKSIZE(%esp) | |||
| #define STACK_INCY 44 + STACKSIZE(%esp) | |||
| #define BUFFER 48 + STACKSIZE(%esp) | |||
| #define ARGS 20 | |||
| #define M 4 + STACKSIZE+ARGS(%esp) | |||
| #define N 8 + STACKSIZE+ARGS(%esp) | |||
| #define ALPHA_R 16 + STACKSIZE+ARGS(%esp) | |||
| #define ALPHA_I 20 + STACKSIZE+ARGS(%esp) | |||
| #define A 24 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_LDA 28 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_X 32 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_INCX 36 + STACKSIZE+ARGS(%esp) | |||
| #define Y 40 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_INCY 44 + STACKSIZE+ARGS(%esp) | |||
| #define BUFFER 48 + STACKSIZE+ARGS(%esp) | |||
| #define MMM 0+ARGS(%esp) | |||
| #define XX 4+ARGS(%esp) | |||
| #define AA 8+ARGS(%esp) | |||
| #define I %eax | |||
| #define J %ebx | |||
| @@ -123,6 +128,7 @@ | |||
| PROLOGUE | |||
| subl $ARGS,%esp | |||
| pushl %ebp | |||
| pushl %edi | |||
| pushl %esi | |||
| @@ -130,8 +136,35 @@ | |||
| PROFCODE | |||
| movl STACK_LDA, LDA | |||
| movl STACK_X, X | |||
| movl X,XX | |||
| movl A,J | |||
| movl J,AA #backup A | |||
| movl M,J | |||
| movl J,MMM | |||
| .L0t: | |||
| xorl J,J | |||
| addl $1,J | |||
| sall $20,J | |||
| subl $8,J | |||
| subl J,MMM #MMM-=J | |||
| movl J,M | |||
| jge .L00t | |||
| ALIGN_4 | |||
| movl MMM,%eax | |||
| addl J,%eax | |||
| jle .L999x | |||
| movl %eax,M | |||
| .L00t: | |||
| movl AA,%eax | |||
| movl %eax,A | |||
| movl XX,%eax | |||
| movl %eax,X | |||
| movl STACK_LDA,LDA | |||
| movl STACK_INCX, INCX | |||
| movl STACK_INCY, INCY | |||
| @@ -513,10 +546,22 @@ | |||
| ALIGN_4 | |||
| .L999: | |||
| movl M,%eax | |||
| sall $ZBASE_SHIFT, %eax | |||
| addl %eax,AA | |||
| movl STACK_INCX,INCX | |||
| imull INCX,%eax | |||
| addl %eax,XX | |||
| jmp .L0t | |||
| ALIGN_4 | |||
| .L999x: | |||
| popl %ebx | |||
| popl %esi | |||
| popl %edi | |||
| popl %ebp | |||
| addl $ARGS,%esp | |||
| ret | |||
| EPILOGUE | |||
| @@ -76,19 +76,24 @@ | |||
| #endif | |||
| #define STACKSIZE 16 | |||
| #define ARGS 20 | |||
| #define M 4 + STACKSIZE+ARGS(%esp) | |||
| #define N 8 + STACKSIZE+ARGS(%esp) | |||
| #define ALPHA_R 16 + STACKSIZE+ARGS(%esp) | |||
| #define ALPHA_I 24 + STACKSIZE+ARGS(%esp) | |||
| #define A 32 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_LDA 36 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_X 40 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_INCX 44 + STACKSIZE+ARGS(%esp) | |||
| #define Y 48 + STACKSIZE+ARGS(%esp) | |||
| #define STACK_INCY 52 + STACKSIZE+ARGS(%esp) | |||
| #define BUFFER 56 + STACKSIZE+ARGS(%esp) | |||
| #define MMM 0 + ARGS(%esp) | |||
| #define AA 4 + ARGS(%esp) | |||
| #define XX 8 + ARGS(%esp) | |||
| #define M 4 + STACKSIZE(%esp) | |||
| #define N 8 + STACKSIZE(%esp) | |||
| #define ALPHA_R 16 + STACKSIZE(%esp) | |||
| #define ALPHA_I 24 + STACKSIZE(%esp) | |||
| #define A 32 + STACKSIZE(%esp) | |||
| #define STACK_LDA 36 + STACKSIZE(%esp) | |||
| #define STACK_X 40 + STACKSIZE(%esp) | |||
| #define STACK_INCX 44 + STACKSIZE(%esp) | |||
| #define Y 48 + STACKSIZE(%esp) | |||
| #define STACK_INCY 52 + STACKSIZE(%esp) | |||
| #define BUFFER 56 + STACKSIZE(%esp) | |||
| #define I %eax | |||
| #define J %ebx | |||
| @@ -110,6 +115,7 @@ | |||
| PROLOGUE | |||
| subl $ARGS,%esp | |||
| pushl %ebp | |||
| pushl %edi | |||
| pushl %esi | |||
| @@ -117,8 +123,35 @@ | |||
| PROFCODE | |||
| movl STACK_X, X | |||
| movl X, XX | |||
| movl A,J | |||
| movl J,AA | |||
| movl M,J | |||
| movl J,MMM | |||
| .L0t: | |||
| xorl J,J | |||
| addl $1,J | |||
| sall $18,J | |||
| subl $4,J | |||
| subl J,MMM | |||
| movl J,M | |||
| jge .L00t | |||
| ALIGN_4 | |||
| movl MMM,%eax | |||
| addl J,%eax | |||
| jle .L999x | |||
| movl %eax, M | |||
| .L00t: | |||
| movl XX, %eax | |||
| movl %eax, X | |||
| movl AA,%eax | |||
| movl %eax,A | |||
| movl STACK_LDA, LDA | |||
| movl STACK_X, X | |||
| movl STACK_INCX, INCX | |||
| movl STACK_INCY, INCY | |||
| @@ -188,7 +221,7 @@ | |||
| movl Y, Y1 | |||
| movl N, J | |||
| ALIGN_3 | |||
| ALIGN_4 | |||
| .L11: | |||
| movl BUFFER, X | |||
| @@ -395,10 +428,21 @@ | |||
| ALIGN_4 | |||
| .L999: | |||
| movl M,%eax | |||
| sall $ZBASE_SHIFT,%eax | |||
| addl %eax,AA | |||
| movl STACK_INCX,INCX | |||
| imull INCX,%eax | |||
| addl %eax,XX | |||
| jmp .L0t | |||
| ALIGN_4 | |||
| .L999x: | |||
| popl %ebx | |||
| popl %esi | |||
| popl %edi | |||
| popl %ebp | |||
| addl $ARGS,%esp | |||
| ret | |||
| EPILOGUE | |||