| @@ -3,7 +3,7 @@ | |||||
| #include "common.h" | #include "common.h" | ||||
| #define FETCH ld | #define FETCH ld | ||||
| #define STACKSIZE 192 | |||||
| #define STACKSIZE 160 | |||||
| #define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq) | #define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq) | ||||
| #define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq) | #define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq) | ||||
| @@ -127,7 +127,7 @@ | |||||
| # .ent gemm | # .ent gemm | ||||
| # .type gemm, @function | # .type gemm, @function | ||||
| #gemm: | #gemm: | ||||
| # .frame $fp,STACKSIZE,$31 # vars= 48, regs= 1/0, args= 0, gp= 0 | |||||
| # .frame $sp,STACKSIZE,$31 # vars= 48, regs= 1/0, args= 0, gp= 0 | |||||
| # .mask 0x40000000,-8 | # .mask 0x40000000,-8 | ||||
| # .fmask 0x00000000,0 | # .fmask 0x00000000,0 | ||||
| # .set noreorder | # .set noreorder | ||||
| @@ -137,34 +137,34 @@ | |||||
| PROLOGUE | PROLOGUE | ||||
| daddiu $sp,$sp,-STACKSIZE | daddiu $sp,$sp,-STACKSIZE | ||||
| sd $fp,184($sp) | |||||
| move $fp,$sp | |||||
| sd $16, 0($fp) | |||||
| sd $17, 8($fp) | |||||
| sd $18, 16($fp) | |||||
| sd $19, 24($fp) | |||||
| sd $20, 32($fp) | |||||
| sd $21, 40($fp) | |||||
| sd $22, 48($fp) | |||||
| ST $f24, 56($fp) | |||||
| ST $f25, 64($fp) | |||||
| ST $f26, 72($fp) | |||||
| ST $f27, 80($fp) | |||||
| ST $f28, 88($fp) | |||||
| sd $16, 0($sp) | |||||
| sd $17, 8($sp) | |||||
| sd $18, 16($sp) | |||||
| sd $19, 24($sp) | |||||
| sd $20, 32($sp) | |||||
| sd $21, 40($sp) | |||||
| sd $22, 48($sp) | |||||
| ST $f24, 56($sp) | |||||
| ST $f25, 64($sp) | |||||
| ST $f26, 72($sp) | |||||
| ST $f27, 80($sp) | |||||
| ST $f28, 88($sp) | |||||
| #if defined(TRMMKERNEL) | #if defined(TRMMKERNEL) | ||||
| sd $23, 96($fp) | |||||
| sd $24, 104($fp) | |||||
| sd $25, 112($fp) | |||||
| sd $23, 96($sp) | |||||
| sd $24, 104($sp) | |||||
| sd $25, 112($sp) | |||||
| LDARG OFFSET, 160($sp) | |||||
| #endif | #endif | ||||
| #ifndef __64BIT__ | #ifndef __64BIT__ | ||||
| ST $f20,120($fp) | |||||
| ST $f21,128($fp) | |||||
| ST $f22,136($fp) | |||||
| ST $f23,144($fp) | |||||
| ST $f20,120($sp) | |||||
| ST $f21,128($sp) | |||||
| ST $f22,136($sp) | |||||
| ST $f23,144($sp) | |||||
| #endif | #endif | ||||
| .align 4 | .align 4 | ||||
| @@ -172,16 +172,12 @@ | |||||
| dsra J, N, 2 # NR=4 | dsra J, N, 2 # NR=4 | ||||
| dsll LDC, LDC, BASE_SHIFT# LDC*SIZE | dsll LDC, LDC, BASE_SHIFT# LDC*SIZE | ||||
| #if defined(TRMMKERNEL) | |||||
| LD OFFSET, 192($fp) | |||||
| #endif | |||||
| #if defined(TRMMKERNEL) && !defined(LEFT) | #if defined(TRMMKERNEL) && !defined(LEFT) | ||||
| neg KK, OFFSET | neg KK, OFFSET | ||||
| #endif | #endif | ||||
| blez J, .L2 | blez J, .L2 | ||||
| ST ALPHA, 152($fp) | |||||
| ST ALPHA, 152($sp) | |||||
| .L48: | .L48: | ||||
| dsra I, M, 3 # MR=8 | dsra I, M, 3 # MR=8 | ||||
| @@ -4670,7 +4666,7 @@ | |||||
| andi L, TEMP, 1 | andi L, TEMP, 1 | ||||
| #endif | #endif | ||||
| blez L, .L480 | blez L, .L480 | ||||
| LD ALPHA, 152($fp) | |||||
| LD ALPHA, 152($sp) | |||||
| MADPS C11, C11, A1, B1 | MADPS C11, C11, A1, B1 | ||||
| MADPS C21, C21, A2, B1 | MADPS C21, C21, A2, B1 | ||||
| @@ -5273,7 +5269,7 @@ | |||||
| andi L, TEMP, 1 | andi L, TEMP, 1 | ||||
| #endif | #endif | ||||
| blez L, .L440 | blez L, .L440 | ||||
| LD ALPHA, 152($fp) | |||||
| LD ALPHA, 152($sp) | |||||
| MADPS C11, C11, A1, B1 | MADPS C11, C11, A1, B1 | ||||
| MADPS C21, C21, A2, B1 | MADPS C21, C21, A2, B1 | ||||
| @@ -5653,7 +5649,7 @@ | |||||
| andi L, TEMP, 1 | andi L, TEMP, 1 | ||||
| #endif | #endif | ||||
| blez L, .L420 | blez L, .L420 | ||||
| LD ALPHA, 152($fp) | |||||
| LD ALPHA, 152($sp) | |||||
| MADPS C11, C11, A1, B1 | MADPS C11, C11, A1, B1 | ||||
| MADPS C12, C12, A1, B2 | MADPS C12, C12, A1, B2 | ||||
| @@ -5968,7 +5964,7 @@ | |||||
| andi L, TEMP, 1 | andi L, TEMP, 1 | ||||
| #endif | #endif | ||||
| blez L, .L410 | blez L, .L410 | ||||
| LD ALPHA, 152($fp) | |||||
| LD ALPHA, 152($sp) | |||||
| MADD C11, C11, A1, B1 | MADD C11, C11, A1, B1 | ||||
| MADD C12, C12, A1, B2 | MADD C12, C12, A1, B2 | ||||
| @@ -6258,7 +6254,7 @@ | |||||
| andi L, TEMP, 1 | andi L, TEMP, 1 | ||||
| #endif | #endif | ||||
| blez L, .L280 | blez L, .L280 | ||||
| LD ALPHA, 152($fp) | |||||
| LD ALPHA, 152($sp) | |||||
| MADD C13, C13, A5, B1 | MADD C13, C13, A5, B1 | ||||
| MADD C23, C23, A6, B1 | MADD C23, C23, A6, B1 | ||||
| @@ -6574,7 +6570,7 @@ | |||||
| andi L, TEMP, 1 | andi L, TEMP, 1 | ||||
| #endif | #endif | ||||
| blez L, .L240 | blez L, .L240 | ||||
| LD ALPHA, 152($fp) | |||||
| LD ALPHA, 152($sp) | |||||
| MADD C11, C11, A1, B1 | MADD C11, C11, A1, B1 | ||||
| MADD C21, C21, A2, B1 | MADD C21, C21, A2, B1 | ||||
| @@ -6784,7 +6780,7 @@ | |||||
| andi L, TEMP, 1 | andi L, TEMP, 1 | ||||
| #endif | #endif | ||||
| blez L, .L220 | blez L, .L220 | ||||
| LD ALPHA, 152($fp) | |||||
| LD ALPHA, 152($sp) | |||||
| MADD C11, C11, A1, B1 | MADD C11, C11, A1, B1 | ||||
| MADD C21, C21, A2, B1 | MADD C21, C21, A2, B1 | ||||
| @@ -6953,7 +6949,7 @@ | |||||
| andi L, TEMP, 1 | andi L, TEMP, 1 | ||||
| #endif | #endif | ||||
| blez L, .L210 | blez L, .L210 | ||||
| LD ALPHA, 152($fp) | |||||
| LD ALPHA, 152($sp) | |||||
| MADD C11, C11, A1, B1 | MADD C11, C11, A1, B1 | ||||
| MADD C12, C12, A1, B2 | MADD C12, C12, A1, B2 | ||||
| @@ -7204,7 +7200,7 @@ | |||||
| andi L, TEMP, 1 | andi L, TEMP, 1 | ||||
| #endif | #endif | ||||
| blez L, .L180 | blez L, .L180 | ||||
| LD ALPHA, 152($fp) | |||||
| LD ALPHA, 152($sp) | |||||
| MADD C13, C13, A5, B1 | MADD C13, C13, A5, B1 | ||||
| MADD C23, C23, A6, B1 | MADD C23, C23, A6, B1 | ||||
| @@ -7435,7 +7431,7 @@ | |||||
| andi L, TEMP, 1 | andi L, TEMP, 1 | ||||
| #endif | #endif | ||||
| blez L, .L140 | blez L, .L140 | ||||
| LD ALPHA, 152($fp) | |||||
| LD ALPHA, 152($sp) | |||||
| MADD C11, C11, A1, B1 | MADD C11, C11, A1, B1 | ||||
| MADD C21, C21, A2, B1 | MADD C21, C21, A2, B1 | ||||
| @@ -7597,7 +7593,7 @@ | |||||
| andi L, TEMP, 1 | andi L, TEMP, 1 | ||||
| #endif | #endif | ||||
| blez L, .L120 | blez L, .L120 | ||||
| LD ALPHA, 152($fp) | |||||
| LD ALPHA, 152($sp) | |||||
| MADD C11, C11, A1, B1 | MADD C11, C11, A1, B1 | ||||
| MADD C21, C21, A2, B1 | MADD C21, C21, A2, B1 | ||||
| @@ -7730,7 +7726,7 @@ | |||||
| andi L, TEMP, 1 | andi L, TEMP, 1 | ||||
| #endif | #endif | ||||
| blez L, .L110 | blez L, .L110 | ||||
| LD ALPHA, 152($fp) | |||||
| LD ALPHA, 152($sp) | |||||
| MADD C11, C11, A1, B1 | MADD C11, C11, A1, B1 | ||||
| daddiu AO, AO, 1 * SIZE | daddiu AO, AO, 1 * SIZE | ||||
| @@ -7762,35 +7758,33 @@ | |||||
| NOP | NOP | ||||
| .L999: | .L999: | ||||
| ld $16, 0($fp) | |||||
| ld $17, 8($fp) | |||||
| ld $18, 16($fp) | |||||
| ld $19, 24($fp) | |||||
| ld $20, 32($fp) | |||||
| ld $21, 40($fp) | |||||
| ld $22, 48($fp) | |||||
| LD $f24, 56($fp) | |||||
| LD $f25, 64($fp) | |||||
| LD $f26, 72($fp) | |||||
| LD $f27, 80($fp) | |||||
| LD $f28, 88($fp) | |||||
| ld $16, 0($sp) | |||||
| ld $17, 8($sp) | |||||
| ld $18, 16($sp) | |||||
| ld $19, 24($sp) | |||||
| ld $20, 32($sp) | |||||
| ld $21, 40($sp) | |||||
| ld $22, 48($sp) | |||||
| LD $f24, 56($sp) | |||||
| LD $f25, 64($sp) | |||||
| LD $f26, 72($sp) | |||||
| LD $f27, 80($sp) | |||||
| LD $f28, 88($sp) | |||||
| #if defined(TRMMKERNEL) | #if defined(TRMMKERNEL) | ||||
| ld $23, 96($fp) | |||||
| ld $24, 104($fp) | |||||
| ld $25, 112($fp) | |||||
| ld $23, 96($sp) | |||||
| ld $24, 104($sp) | |||||
| ld $25, 112($sp) | |||||
| #endif | #endif | ||||
| #ifndef __64BIT__ | #ifndef __64BIT__ | ||||
| LD $f20,120($fp) | |||||
| LD $f21,128($fp) | |||||
| LD $f22,136($fp) | |||||
| LD $f23,144($fp) | |||||
| LD $f20,120($sp) | |||||
| LD $f21,128($sp) | |||||
| LD $f22,136($sp) | |||||
| LD $f23,144($sp) | |||||
| #endif | #endif | ||||
| move $sp,$fp | |||||
| ld $fp,184($sp) | |||||
| daddiu $sp,$sp,STACKSIZE | daddiu $sp,$sp,STACKSIZE | ||||
| j $31 | j $31 | ||||
| nop | nop | ||||