| @@ -53,9 +53,86 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| PROLOGUE | |||
| li.d TEMP, SIZE | |||
| ld.d XX, $sp, 0 // Load dummy2 | |||
| slli.d XX, XX, BASE_SHIFT | |||
| MTC a1, $r0 | |||
| slli.d INCX, INCX, BASE_SHIFT | |||
| bge $r0, N, .L999 | |||
| CMPEQ $fcc0, ALPHA, a1 | |||
| bceqz $fcc0, .L50 | |||
| beq XX, TEMP, .L50 // if dummp2 == 1, do not directly copy 0 | |||
| srai.d I, N, 3 | |||
| bne INCX, TEMP, .L20 | |||
| bge $r0, I, .L15 | |||
| .align 3 | |||
| .L12: | |||
| ST a1, X, 0 * SIZE | |||
| ST a1, X, 1 * SIZE | |||
| ST a1, X, 2 * SIZE | |||
| ST a1, X, 3 * SIZE | |||
| ST a1, X, 4 * SIZE | |||
| ST a1, X, 5 * SIZE | |||
| ST a1, X, 6 * SIZE | |||
| ST a1, X, 7 * SIZE | |||
| addi.w I, I, -1 | |||
| addi.d X, X, 8 * SIZE | |||
| blt $r0, I, .L12 | |||
| .align 3 | |||
| .L15: | |||
| andi I, N, 7 | |||
| bge $r0, I, .L999 | |||
| .align 3 | |||
| .L16: | |||
| ST a1, X, 0 * SIZE | |||
| addi.d I, I, -1 | |||
| addi.d X, X, SIZE | |||
| blt $r0, I, .L16 | |||
| move $r4, $r17 | |||
| fmov.d $f0, $f22 | |||
| jirl $r0, $r1, 0x0 | |||
| .align 3 | |||
| .L20: | |||
| srai.d I, N, 3 | |||
| bge $r0, I, .L25 | |||
| .align 3 | |||
| .L22: | |||
| ST a1, X, 0 * SIZE | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 * SIZE | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 * SIZE | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 * SIZE | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 * SIZE | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 * SIZE | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 * SIZE | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 * SIZE | |||
| addi.d I, I, -1 | |||
| add.d X, X, INCX | |||
| blt $r0, I, .L22 | |||
| .align 3 | |||
| .L25: | |||
| andi I, N, 7 | |||
| bge $r0, I, .L999 | |||
| .align 3 | |||
| .L26: | |||
| addi.d I, I, -1 | |||
| ST a1, X, 0 * SIZE | |||
| add.d X, X, INCX | |||
| blt $r0, I, .L26 | |||
| move $r4, $r17 | |||
| fmov.d $f0, $f22 | |||
| jirl $r0, $r1, 0x0 | |||
| .align 3 | |||
| .L50: | |||
| srai.d I, N, 3 | |||
| @@ -52,17 +52,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| bge $r0, N, .L999 | |||
| bge $r0, INCX, .L999 | |||
| li.d TEMP, 1 | |||
| ld.d t1, $sp, 0 // Load dummp2 | |||
| movgr2fr.d a1, $r0 | |||
| FFINT a1, a1 | |||
| movgr2fr.d a2, TEMP | |||
| FFINT a2, a2 | |||
| slli.d TEMP, TEMP, BASE_SHIFT | |||
| slli.d INCX, INCX, BASE_SHIFT | |||
| slli.d t1, t1, BASE_SHIFT | |||
| CMPEQ $fcc0, ALPHA, a1 | |||
| bcnez $fcc0, .L20 //ALPHA==0 | |||
| CMPEQ $fcc0, ALPHA, a2 | |||
| bcnez $fcc0, .L999 //ALPHA==1 return | |||
| .L1: | |||
| srai.d I, N, 3 | |||
| beq INCX, TEMP, .L30 //ALPHA!=1 and INCX==1 | |||
| beq INCX, TEMP, .L30 //ALPHA !=0|1 and INCX==1 | |||
| MTG TEMP, ALPHA | |||
| #ifdef DOUBLE | |||
| xvreplgr2vr.d VALPHA, TEMP | |||
| @@ -72,7 +76,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| move XX, X | |||
| .align 3 | |||
| .L10: //ALPHA!=1 and INCX!=1 | |||
| .L10: //ALPHA !=0|1 and INCX!=1 | |||
| bge $r0, I, .L32 | |||
| .align 3 | |||
| .L11: | |||
| @@ -165,6 +169,75 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| blt $r0, I, .L11 | |||
| b .L32 | |||
| .align 3 | |||
| .L20: | |||
| beq t1, TEMP, .L1 // if dummp2 == 1, do not directly copy 0 | |||
| srai.d I, N, 3 | |||
| beq INCX, TEMP, .L24 | |||
| bge $r0, I, .L22 | |||
| .align 3 | |||
| .L21: | |||
| ST a1, X, 0 | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 | |||
| add.d X, X, INCX | |||
| addi.d I, I, -1 | |||
| blt $r0, I, .L21 | |||
| .align 3 | |||
| .L22: | |||
| andi I, N, 7 | |||
| bge $r0, I, .L999 | |||
| .align 3 | |||
| .L23: | |||
| ST a1, X, 0 * SIZE | |||
| addi.d I, I, -1 | |||
| add.d X, X, INCX | |||
| blt $r0, I, .L23 | |||
| jirl $r0, $r1, 0 | |||
| .align 3 | |||
| .L24: | |||
| bge $r0, I, .L26 /*N<8 INCX==1*/ | |||
| .align 3 | |||
| .L25: | |||
| xvxor.v VX0, VX0, VX0 | |||
| xvst VX0, X, 0 * SIZE | |||
| #ifdef DOUBLE | |||
| xvst VX0, X, 4 * SIZE | |||
| #endif | |||
| addi.d I, I, -1 | |||
| addi.d X, X, 8 * SIZE | |||
| blt $r0, I, .L25 | |||
| .align 3 | |||
| .L26: | |||
| andi I, N, 7 | |||
| bge $r0, I, .L999 | |||
| .align 3 | |||
| .L27: | |||
| ST a1, X, 0 * SIZE | |||
| addi.d I, I, -1 | |||
| addi.d X, X, SIZE | |||
| blt $r0, I, .L27 | |||
| jirl $r0, $r1, 0 | |||
| .align 3 | |||
| .L30: | |||
| bge $r0, I, .L32/*N<8 INCX==1*/ | |||
| MTG TEMP, ALPHA | |||
| @@ -51,6 +51,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| bge $r0, N, .L999 | |||
| bge $r0, INCX, .L999 | |||
| ld.d t1, $sp, 0 // Load dummy2 | |||
| li.d TEMP, 1 | |||
| movgr2fr.d a1, $r0 | |||
| FFINT a1, a1 | |||
| @@ -58,10 +59,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| FFINT a2, a2 | |||
| slli.d TEMP, TEMP, BASE_SHIFT | |||
| slli.d INCX, INCX, BASE_SHIFT | |||
| slli.d t1, t1, BASE_SHIFT | |||
| CMPEQ $fcc0, ALPHA, a1 | |||
| bcnez $fcc0, .L20 //ALPHA==0 | |||
| CMPEQ $fcc0, ALPHA, a2 | |||
| bcnez $fcc0, .L999 //ALPHA==1 return | |||
| .L1: | |||
| srai.d I, N, 3 | |||
| beq INCX, TEMP, .L30 //ALPHA!=1 and INCX==1 | |||
| beq INCX, TEMP, .L30 //ALPHA !=0|1 and INCX==1 | |||
| MTG TEMP, ALPHA | |||
| #ifdef DOUBLE | |||
| vreplgr2vr.d VALPHA, TEMP | |||
| @@ -71,7 +76,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| move XX, X | |||
| .align 3 | |||
| .L10: //ALPHA!=1 and INCX!=1 | |||
| .L10: //ALPHA !=0|1 and INCX!=1 | |||
| bge $r0, I, .L32 | |||
| .align 3 | |||
| @@ -169,6 +174,79 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| b .L32 | |||
| .align 3 | |||
| .L20: | |||
| beq t1, TEMP, .L1 // if dummp2 == 1, do not directly copy 0 | |||
| srai.d I, N, 3 | |||
| beq INCX, TEMP, .L24 | |||
| bge $r0, I, .L22 | |||
| .align 3 | |||
| .L21: | |||
| ST a1, X, 0 | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 | |||
| add.d X, X, INCX | |||
| ST a1, X, 0 | |||
| add.d X, X, INCX | |||
| addi.d I, I, -1 | |||
| blt $r0, I, .L21 | |||
| .align 3 | |||
| .L22: | |||
| andi I, N, 7 | |||
| bge $r0, I, .L999 | |||
| .align 3 | |||
| .L23: | |||
| ST a1, X, 0 * SIZE | |||
| addi.d I, I, -1 | |||
| add.d X, X, INCX | |||
| blt $r0, I, .L23 | |||
| jirl $r0, $r1, 0 | |||
| .align 3 | |||
| .L24: | |||
| bge $r0, I, .L26 /*N<8 INCX==1*/ | |||
| .align 3 | |||
| .L25: | |||
| vxor.v VX0, VX0, VX0 | |||
| vst VX0, X, 0 * SIZE | |||
| #ifdef DOUBLE | |||
| vst VX0, X, 2 * SIZE | |||
| vst VX0, X, 4 * SIZE | |||
| vst VX0, X, 6 * SIZE | |||
| #else | |||
| vst VX0, X, 4 * SIZE | |||
| #endif | |||
| addi.d I, I, -1 | |||
| addi.d X, X, 8 * SIZE | |||
| blt $r0, I, .L25 | |||
| .align 3 | |||
| .L26: | |||
| andi I, N, 7 | |||
| bge $r0, I, .L999 | |||
| .align 3 | |||
| .L27: | |||
| ST a1, X, 0 * SIZE | |||
| addi.d I, I, -1 | |||
| addi.d X, X, SIZE | |||
| blt $r0, I, .L27 | |||
| jirl $r0, $r1, 0 | |||
| .align 3 | |||
| .L30: | |||
| bge $r0, I, .L32/*N<8 INCX==1*/ | |||
| MTG TEMP, ALPHA | |||