| @@ -101,10 +101,10 @@ | |||||
| #define Y 36 + STACKSIZE+ARGS(%esp) | #define Y 36 + STACKSIZE+ARGS(%esp) | ||||
| #define STACK_INCY 40 + STACKSIZE+ARGS(%esp) | #define STACK_INCY 40 + STACKSIZE+ARGS(%esp) | ||||
| #define BUFFER 44 + STACKSIZE+ARGS(%esp) | #define BUFFER 44 + STACKSIZE+ARGS(%esp) | ||||
| #define MMM 0+ARGS(%esp) | #define MMM 0+ARGS(%esp) | ||||
| #define YY 4+ARGS(%esp) | #define YY 4+ARGS(%esp) | ||||
| #define AA 8+ARGS(%esp) | #define AA 8+ARGS(%esp) | ||||
| #define LDAX 12+ARGS(%esp) | |||||
| #define I %eax | #define I %eax | ||||
| #define J %ebx | #define J %ebx | ||||
| @@ -153,8 +153,8 @@ | |||||
| movl YY,J | movl YY,J | ||||
| movl J,Y | movl J,Y | ||||
| movl STACK_LDA, LDA | |||||
| movl STACK_LDA, LDA | |||||
| movl STACK_X, X | movl STACK_X, X | ||||
| movl STACK_INCX, INCX | movl STACK_INCX, INCX | ||||
| @@ -688,9 +688,9 @@ | |||||
| movl M,J | movl M,J | ||||
| leal (,J,SIZE),%eax | leal (,J,SIZE),%eax | ||||
| addl %eax,AA | addl %eax,AA | ||||
| movl YY,J | |||||
| addl %eax,J | |||||
| movl J,YY | |||||
| movl STACK_INCY,INCY | |||||
| imull INCY,%eax | |||||
| addl %eax,YY | |||||
| jmp .L0t | jmp .L0t | ||||
| ALIGN_4 | ALIGN_4 | ||||
| @@ -714,9 +714,9 @@ | |||||
| movl M,J | movl M,J | ||||
| leal (,J,SIZE),%eax | leal (,J,SIZE),%eax | ||||
| addl %eax,AA | addl %eax,AA | ||||
| movl YY,J | |||||
| addl %eax,J | |||||
| movl J,YY | |||||
| movl STACK_INCY,INCY | |||||
| imull INCY,%eax | |||||
| addl %eax,YY | |||||
| jmp .L0t | jmp .L0t | ||||
| ALIGN_4 | ALIGN_4 | ||||
| @@ -102,11 +102,9 @@ | |||||
| #define STACK_INCY 40 + STACKSIZE+ARGS(%esp) | #define STACK_INCY 40 + STACKSIZE+ARGS(%esp) | ||||
| #define BUFFER 44 + STACKSIZE+ARGS(%esp) | #define BUFFER 44 + STACKSIZE+ARGS(%esp) | ||||
| #define MMM 0+STACKSIZE(%esp) | |||||
| #define NN 4+STACKSIZE(%esp) | |||||
| #define AA 8+STACKSIZE(%esp) | |||||
| #define LDAX 12+STACKSIZE(%esp) | |||||
| #define XX 16+STACKSIZE(%esp) | |||||
| #define MMM 0+ARGS(%esp) | |||||
| #define AA 4+ARGS(%esp) | |||||
| #define XX 8+ARGS(%esp) | |||||
| #define I %eax | #define I %eax | ||||
| #define J %ebx | #define J %ebx | ||||
| @@ -129,12 +127,8 @@ | |||||
| PROFCODE | PROFCODE | ||||
| movl STACK_LDA, LDA | |||||
| movl LDA,LDAX # backup LDA | |||||
| movl STACK_X, X | movl STACK_X, X | ||||
| movl X,XX | movl X,XX | ||||
| movl N,J | |||||
| movl J,NN # backup N | |||||
| movl A,J | movl A,J | ||||
| movl J,AA # backup A | movl J,AA # backup A | ||||
| movl M,J | movl M,J | ||||
| @@ -144,7 +138,6 @@ | |||||
| addl $1,J | addl $1,J | ||||
| sall $22,J # J=2^24*sizeof(float)=buffer size(16MB) | sall $22,J # J=2^24*sizeof(float)=buffer size(16MB) | ||||
| subl $8, J # Don't use last 8 float in the buffer. | subl $8, J # Don't use last 8 float in the buffer. | ||||
| # Now, split M by block J | |||||
| subl J,MMM # MMM=MMM-J | subl J,MMM # MMM=MMM-J | ||||
| movl J,M | movl J,M | ||||
| jge .L00t | jge .L00t | ||||
| @@ -159,13 +152,10 @@ | |||||
| movl AA,%eax | movl AA,%eax | ||||
| movl %eax,A # mov AA to A | movl %eax,A # mov AA to A | ||||
| movl NN,%eax | |||||
| movl %eax,N # reset N | |||||
| movl LDAX, LDA # reset LDA | |||||
| movl XX,X | |||||
| movl XX,%eax | |||||
| movl %eax,X | |||||
| movl STACK_LDA, LDA | |||||
| movl STACK_INCX, INCX | movl STACK_INCX, INCX | ||||
| movl STACK_INCY, INCY | movl STACK_INCY, INCY | ||||
| @@ -688,9 +678,9 @@ | |||||
| movl M,J | movl M,J | ||||
| leal (,J,SIZE),%eax | leal (,J,SIZE),%eax | ||||
| addl %eax,AA | addl %eax,AA | ||||
| movl XX,J | |||||
| addl %eax,J | |||||
| movl J,XX | |||||
| movl STACK_INCX,INCX | |||||
| imull INCX,%eax | |||||
| addl %eax,XX | |||||
| jmp .L0t | jmp .L0t | ||||
| ALIGN_4 | ALIGN_4 | ||||
| @@ -76,7 +76,7 @@ | |||||
| #endif | #endif | ||||
| #define STACKSIZE 16 | #define STACKSIZE 16 | ||||
| #define ARGS 16 | |||||
| #define ARGS 20 | |||||
| #define M 4 + STACKSIZE+ARGS(%esp) | #define M 4 + STACKSIZE+ARGS(%esp) | ||||
| #define N 8 + STACKSIZE+ARGS(%esp) | #define N 8 + STACKSIZE+ARGS(%esp) | ||||
| @@ -89,10 +89,9 @@ | |||||
| #define STACK_INCY 44 + STACKSIZE+ARGS(%esp) | #define STACK_INCY 44 + STACKSIZE+ARGS(%esp) | ||||
| #define BUFFER 48 + STACKSIZE+ARGS(%esp) | #define BUFFER 48 + STACKSIZE+ARGS(%esp) | ||||
| #define MMM 0+STACKSIZE(%esp) | |||||
| #define AA 4+STACKSIZE(%esp) | |||||
| #define LDAX 8+STACKSIZE(%esp) | |||||
| #define NN 12+STACKSIZE(%esp) | |||||
| #define MMM 0+ARGS(%esp) | |||||
| #define AA 4+ARGS(%esp) | |||||
| #define XX 8+ARGS(%esp) | |||||
| #define I %eax | #define I %eax | ||||
| #define J %ebx | #define J %ebx | ||||
| @@ -117,10 +116,8 @@ | |||||
| PROFCODE | PROFCODE | ||||
| movl STACK_LDA, LDA | |||||
| movl LDA,LDAX # backup LDA | |||||
| movl N,J | |||||
| movl J,NN # backup N | |||||
| movl STACK_X, X | |||||
| movl X,XX | |||||
| movl A,J | movl A,J | ||||
| movl J,AA # backup A | movl J,AA # backup A | ||||
| movl M,J | movl M,J | ||||
| @@ -130,7 +127,6 @@ | |||||
| addl $1,J | addl $1,J | ||||
| sall $21,J # J=2^21*sizeof(double)=buffer size(16MB) | sall $21,J # J=2^21*sizeof(double)=buffer size(16MB) | ||||
| subl $4, J # Don't use last 4 double in the buffer. | subl $4, J # Don't use last 4 double in the buffer. | ||||
| # Now, split M by block J | |||||
| subl J,MMM # MMM=MMM-J | subl J,MMM # MMM=MMM-J | ||||
| movl J,M | movl J,M | ||||
| jge .L00t | jge .L00t | ||||
| @@ -142,15 +138,13 @@ | |||||
| movl %eax,M | movl %eax,M | ||||
| .L00t: | .L00t: | ||||
| movl XX,%eax | |||||
| movl %eax, X | |||||
| movl AA,%eax | movl AA,%eax | ||||
| movl %eax,A # mov AA to A | movl %eax,A # mov AA to A | ||||
| movl NN,%eax | |||||
| movl %eax,N # reset N | |||||
| movl LDAX, LDA # reset LDA | |||||
| movl STACK_X, X | |||||
| movl STACK_LDA, LDA | |||||
| movl STACK_INCX, INCX | movl STACK_INCX, INCX | ||||
| movl STACK_INCY, INCY | movl STACK_INCY, INCY | ||||
| @@ -605,6 +599,9 @@ | |||||
| movl M,J | movl M,J | ||||
| leal (,J,SIZE),%eax | leal (,J,SIZE),%eax | ||||
| addl %eax,AA | addl %eax,AA | ||||
| movl STACK_INCX,INCX | |||||
| imull INCX,%eax | |||||
| addl %eax,XX | |||||
| jmp .L0t | jmp .L0t | ||||
| ALIGN_4 | ALIGN_4 | ||||
| @@ -89,18 +89,23 @@ | |||||
| #endif | #endif | ||||
| #define STACKSIZE 16 | #define STACKSIZE 16 | ||||
| #define M 4 + STACKSIZE(%esp) | |||||
| #define N 8 + STACKSIZE(%esp) | |||||
| #define ALPHA_R 16 + STACKSIZE(%esp) | |||||
| #define ALPHA_I 20 + STACKSIZE(%esp) | |||||
| #define A 24 + STACKSIZE(%esp) | |||||
| #define STACK_LDA 28 + STACKSIZE(%esp) | |||||
| #define STACK_X 32 + STACKSIZE(%esp) | |||||
| #define STACK_INCX 36 + STACKSIZE(%esp) | |||||
| #define Y 40 + STACKSIZE(%esp) | |||||
| #define STACK_INCY 44 + STACKSIZE(%esp) | |||||
| #define BUFFER 48 + STACKSIZE(%esp) | |||||
| #define ARGS 20 | |||||
| #define M 4 + STACKSIZE+ARGS(%esp) | |||||
| #define N 8 + STACKSIZE+ARGS(%esp) | |||||
| #define ALPHA_R 16 + STACKSIZE+ARGS(%esp) | |||||
| #define ALPHA_I 20 + STACKSIZE+ARGS(%esp) | |||||
| #define A 24 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_LDA 28 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_X 32 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_INCX 36 + STACKSIZE+ARGS(%esp) | |||||
| #define Y 40 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_INCY 44 + STACKSIZE+ARGS(%esp) | |||||
| #define BUFFER 48 + STACKSIZE+ARGS(%esp) | |||||
| #define MMM 0+ARGS(%esp) | |||||
| #define YY 4+ARGS(%esp) | |||||
| #define AA 8+ARGS(%esp) | |||||
| #define I %eax | #define I %eax | ||||
| #define J %ebx | #define J %ebx | ||||
| @@ -123,6 +128,7 @@ | |||||
| PROLOGUE | PROLOGUE | ||||
| subl $ARGS,%esp | |||||
| pushl %ebp | pushl %ebp | ||||
| pushl %edi | pushl %edi | ||||
| pushl %esi | pushl %esi | ||||
| @@ -130,6 +136,33 @@ | |||||
| PROFCODE | PROFCODE | ||||
| movl Y,J | |||||
| movl J,YY | |||||
| movl A,J | |||||
| movl J,AA | |||||
| movl M,J | |||||
| movl J,MMM | |||||
| .L0t: | |||||
| xorl J,J | |||||
| addl $1,J | |||||
| sall $20,J | |||||
| subl J,MMM | |||||
| movl J,M | |||||
| jge .L00t | |||||
| ALIGN_3 | |||||
| movl MMM,%eax | |||||
| addl J,%eax | |||||
| jle .L999x | |||||
| movl %eax,M | |||||
| .L00t: | |||||
| movl AA,%eax | |||||
| movl %eax,A | |||||
| movl YY,J | |||||
| movl J,Y | |||||
| movl STACK_LDA, LDA | movl STACK_LDA, LDA | ||||
| movl STACK_X, X | movl STACK_X, X | ||||
| movl STACK_INCX, INCX | movl STACK_INCX, INCX | ||||
| @@ -595,10 +628,21 @@ | |||||
| ALIGN_3 | ALIGN_3 | ||||
| .L999: | .L999: | ||||
| movl M,%eax | |||||
| sall $ZBASE_SHIFT,%eax | |||||
| addl %eax,AA | |||||
| movl STACK_INCY,INCY | |||||
| imull INCY,%eax | |||||
| addl %eax,YY | |||||
| jmp .L0t | |||||
| ALIGN_3 | |||||
| .L999x: | |||||
| popl %ebx | popl %ebx | ||||
| popl %esi | popl %esi | ||||
| popl %edi | popl %edi | ||||
| popl %ebp | popl %ebp | ||||
| addl $ARGS,%esp | |||||
| ret | ret | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -76,18 +76,23 @@ | |||||
| #endif | #endif | ||||
| #define STACKSIZE 16 | #define STACKSIZE 16 | ||||
| #define ARGS 16 | |||||
| #define M 4 + STACKSIZE+ARGS(%esp) | |||||
| #define N 8 + STACKSIZE+ARGS(%esp) | |||||
| #define ALPHA_R 16 + STACKSIZE+ARGS(%esp) | |||||
| #define ALPHA_I 24 + STACKSIZE+ARGS(%esp) | |||||
| #define A 32 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_LDA 36 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_X 40 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_INCX 44 + STACKSIZE+ARGS(%esp) | |||||
| #define Y 48 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_INCY 52 + STACKSIZE+ARGS(%esp) | |||||
| #define BUFFER 56 + STACKSIZE+ARGS(%esp) | |||||
| #define MMM 0 + ARGS(%esp) | |||||
| #define YY 4 + ARGS(%esp) | |||||
| #define AA 8 + ARGS(%esp) | |||||
| #define M 4 + STACKSIZE(%esp) | |||||
| #define N 8 + STACKSIZE(%esp) | |||||
| #define ALPHA_R 16 + STACKSIZE(%esp) | |||||
| #define ALPHA_I 24 + STACKSIZE(%esp) | |||||
| #define A 32 + STACKSIZE(%esp) | |||||
| #define STACK_LDA 36 + STACKSIZE(%esp) | |||||
| #define STACK_X 40 + STACKSIZE(%esp) | |||||
| #define STACK_INCX 44 + STACKSIZE(%esp) | |||||
| #define Y 48 + STACKSIZE(%esp) | |||||
| #define STACK_INCY 52 + STACKSIZE(%esp) | |||||
| #define BUFFER 56 + STACKSIZE(%esp) | |||||
| #define I %eax | #define I %eax | ||||
| #define J %ebx | #define J %ebx | ||||
| @@ -110,6 +115,7 @@ | |||||
| PROLOGUE | PROLOGUE | ||||
| subl $ARGS,%esp | |||||
| pushl %ebp | pushl %ebp | ||||
| pushl %edi | pushl %edi | ||||
| pushl %esi | pushl %esi | ||||
| @@ -117,6 +123,33 @@ | |||||
| PROFCODE | PROFCODE | ||||
| movl Y,J | |||||
| movl J,YY | |||||
| movl A,J | |||||
| movl J,AA | |||||
| movl M,J | |||||
| movl J,MMM | |||||
| .L0t: | |||||
| xorl J,J | |||||
| addl $1,J | |||||
| sall $18,J | |||||
| subl J,MMM | |||||
| movl J,M | |||||
| jge .L00t | |||||
| ALIGN_3 | |||||
| movl MMM,%eax | |||||
| addl J,%eax | |||||
| jle .L999x | |||||
| movl %eax,M | |||||
| .L00t: | |||||
| movl AA,%eax | |||||
| movl %eax,A | |||||
| movl YY,J | |||||
| movl J,Y | |||||
| movl STACK_LDA, LDA | movl STACK_LDA, LDA | ||||
| movl STACK_X, X | movl STACK_X, X | ||||
| movl STACK_INCX, INCX | movl STACK_INCX, INCX | ||||
| @@ -458,10 +491,21 @@ | |||||
| ALIGN_3 | ALIGN_3 | ||||
| .L999: | .L999: | ||||
| movl M,%eax | |||||
| sall $ZBASE_SHIFT,%eax | |||||
| addl %eax,AA | |||||
| movl STACK_INCY,INCY | |||||
| imull INCY,%eax | |||||
| addl %eax,YY | |||||
| jmp .L0t | |||||
| ALIGN_3 | |||||
| .L999x: | |||||
| popl %ebx | popl %ebx | ||||
| popl %esi | popl %esi | ||||
| popl %edi | popl %edi | ||||
| popl %ebp | popl %ebp | ||||
| addl $ARGS,%esp | |||||
| ret | ret | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -89,18 +89,23 @@ | |||||
| #endif | #endif | ||||
| #define STACKSIZE 16 | #define STACKSIZE 16 | ||||
| #define M 4 + STACKSIZE(%esp) | |||||
| #define N 8 + STACKSIZE(%esp) | |||||
| #define ALPHA_R 16 + STACKSIZE(%esp) | |||||
| #define ALPHA_I 20 + STACKSIZE(%esp) | |||||
| #define A 24 + STACKSIZE(%esp) | |||||
| #define STACK_LDA 28 + STACKSIZE(%esp) | |||||
| #define STACK_X 32 + STACKSIZE(%esp) | |||||
| #define STACK_INCX 36 + STACKSIZE(%esp) | |||||
| #define Y 40 + STACKSIZE(%esp) | |||||
| #define STACK_INCY 44 + STACKSIZE(%esp) | |||||
| #define BUFFER 48 + STACKSIZE(%esp) | |||||
| #define ARGS 20 | |||||
| #define M 4 + STACKSIZE+ARGS(%esp) | |||||
| #define N 8 + STACKSIZE+ARGS(%esp) | |||||
| #define ALPHA_R 16 + STACKSIZE+ARGS(%esp) | |||||
| #define ALPHA_I 20 + STACKSIZE+ARGS(%esp) | |||||
| #define A 24 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_LDA 28 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_X 32 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_INCX 36 + STACKSIZE+ARGS(%esp) | |||||
| #define Y 40 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_INCY 44 + STACKSIZE+ARGS(%esp) | |||||
| #define BUFFER 48 + STACKSIZE+ARGS(%esp) | |||||
| #define MMM 0+ARGS(%esp) | |||||
| #define XX 4+ARGS(%esp) | |||||
| #define AA 8+ARGS(%esp) | |||||
| #define I %eax | #define I %eax | ||||
| #define J %ebx | #define J %ebx | ||||
| @@ -123,6 +128,7 @@ | |||||
| PROLOGUE | PROLOGUE | ||||
| subl $ARGS,%esp | |||||
| pushl %ebp | pushl %ebp | ||||
| pushl %edi | pushl %edi | ||||
| pushl %esi | pushl %esi | ||||
| @@ -130,8 +136,35 @@ | |||||
| PROFCODE | PROFCODE | ||||
| movl STACK_LDA, LDA | |||||
| movl STACK_X, X | movl STACK_X, X | ||||
| movl X,XX | |||||
| movl A,J | |||||
| movl J,AA #backup A | |||||
| movl M,J | |||||
| movl J,MMM | |||||
| .L0t: | |||||
| xorl J,J | |||||
| addl $1,J | |||||
| sall $20,J | |||||
| subl $8,J | |||||
| subl J,MMM #MMM-=J | |||||
| movl J,M | |||||
| jge .L00t | |||||
| ALIGN_4 | |||||
| movl MMM,%eax | |||||
| addl J,%eax | |||||
| jle .L999x | |||||
| movl %eax,M | |||||
| .L00t: | |||||
| movl AA,%eax | |||||
| movl %eax,A | |||||
| movl XX,%eax | |||||
| movl %eax,X | |||||
| movl STACK_LDA,LDA | |||||
| movl STACK_INCX, INCX | movl STACK_INCX, INCX | ||||
| movl STACK_INCY, INCY | movl STACK_INCY, INCY | ||||
| @@ -513,10 +546,22 @@ | |||||
| ALIGN_4 | ALIGN_4 | ||||
| .L999: | .L999: | ||||
| movl M,%eax | |||||
| sall $ZBASE_SHIFT, %eax | |||||
| addl %eax,AA | |||||
| movl STACK_INCX,INCX | |||||
| imull INCX,%eax | |||||
| addl %eax,XX | |||||
| jmp .L0t | |||||
| ALIGN_4 | |||||
| .L999x: | |||||
| popl %ebx | popl %ebx | ||||
| popl %esi | popl %esi | ||||
| popl %edi | popl %edi | ||||
| popl %ebp | popl %ebp | ||||
| addl $ARGS,%esp | |||||
| ret | ret | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -76,19 +76,24 @@ | |||||
| #endif | #endif | ||||
| #define STACKSIZE 16 | #define STACKSIZE 16 | ||||
| #define ARGS 20 | |||||
| #define M 4 + STACKSIZE+ARGS(%esp) | |||||
| #define N 8 + STACKSIZE+ARGS(%esp) | |||||
| #define ALPHA_R 16 + STACKSIZE+ARGS(%esp) | |||||
| #define ALPHA_I 24 + STACKSIZE+ARGS(%esp) | |||||
| #define A 32 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_LDA 36 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_X 40 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_INCX 44 + STACKSIZE+ARGS(%esp) | |||||
| #define Y 48 + STACKSIZE+ARGS(%esp) | |||||
| #define STACK_INCY 52 + STACKSIZE+ARGS(%esp) | |||||
| #define BUFFER 56 + STACKSIZE+ARGS(%esp) | |||||
| #define MMM 0 + ARGS(%esp) | |||||
| #define AA 4 + ARGS(%esp) | |||||
| #define XX 8 + ARGS(%esp) | |||||
| #define M 4 + STACKSIZE(%esp) | |||||
| #define N 8 + STACKSIZE(%esp) | |||||
| #define ALPHA_R 16 + STACKSIZE(%esp) | |||||
| #define ALPHA_I 24 + STACKSIZE(%esp) | |||||
| #define A 32 + STACKSIZE(%esp) | |||||
| #define STACK_LDA 36 + STACKSIZE(%esp) | |||||
| #define STACK_X 40 + STACKSIZE(%esp) | |||||
| #define STACK_INCX 44 + STACKSIZE(%esp) | |||||
| #define Y 48 + STACKSIZE(%esp) | |||||
| #define STACK_INCY 52 + STACKSIZE(%esp) | |||||
| #define BUFFER 56 + STACKSIZE(%esp) | |||||
| #define I %eax | #define I %eax | ||||
| #define J %ebx | #define J %ebx | ||||
| @@ -110,6 +115,7 @@ | |||||
| PROLOGUE | PROLOGUE | ||||
| subl $ARGS,%esp | |||||
| pushl %ebp | pushl %ebp | ||||
| pushl %edi | pushl %edi | ||||
| pushl %esi | pushl %esi | ||||
| @@ -117,8 +123,35 @@ | |||||
| PROFCODE | PROFCODE | ||||
| movl STACK_X, X | |||||
| movl X, XX | |||||
| movl A,J | |||||
| movl J,AA | |||||
| movl M,J | |||||
| movl J,MMM | |||||
| .L0t: | |||||
| xorl J,J | |||||
| addl $1,J | |||||
| sall $18,J | |||||
| subl $4,J | |||||
| subl J,MMM | |||||
| movl J,M | |||||
| jge .L00t | |||||
| ALIGN_4 | |||||
| movl MMM,%eax | |||||
| addl J,%eax | |||||
| jle .L999x | |||||
| movl %eax, M | |||||
| .L00t: | |||||
| movl XX, %eax | |||||
| movl %eax, X | |||||
| movl AA,%eax | |||||
| movl %eax,A | |||||
| movl STACK_LDA, LDA | movl STACK_LDA, LDA | ||||
| movl STACK_X, X | |||||
| movl STACK_INCX, INCX | movl STACK_INCX, INCX | ||||
| movl STACK_INCY, INCY | movl STACK_INCY, INCY | ||||
| @@ -188,7 +221,7 @@ | |||||
| movl Y, Y1 | movl Y, Y1 | ||||
| movl N, J | movl N, J | ||||
| ALIGN_3 | |||||
| ALIGN_4 | |||||
| .L11: | .L11: | ||||
| movl BUFFER, X | movl BUFFER, X | ||||
| @@ -395,10 +428,21 @@ | |||||
| ALIGN_4 | ALIGN_4 | ||||
| .L999: | .L999: | ||||
| movl M,%eax | |||||
| sall $ZBASE_SHIFT,%eax | |||||
| addl %eax,AA | |||||
| movl STACK_INCX,INCX | |||||
| imull INCX,%eax | |||||
| addl %eax,XX | |||||
| jmp .L0t | |||||
| ALIGN_4 | |||||
| .L999x: | |||||
| popl %ebx | popl %ebx | ||||
| popl %esi | popl %esi | ||||
| popl %edi | popl %edi | ||||
| popl %ebp | popl %ebp | ||||
| addl $ARGS,%esp | |||||
| ret | ret | ||||
| EPILOGUE | EPILOGUE | ||||