| @@ -47,6 +47,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define VX4 $xr21 | |||
| #define res1 $xr19 | |||
| #define res2 $xr20 | |||
| #define RCP $f2 | |||
| #define VALPHA $xr3 | |||
| PROLOGUE | |||
| @@ -55,10 +57,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| LDINT INCX, 0(INCX) | |||
| #endif | |||
| xvxor.v res1, res1, res1 | |||
| xvxor.v res2, res2, res2 | |||
| bge $r0, N, .L999 | |||
| beq $r0, INCX, .L999 | |||
| addi.d $sp, $sp, -32 | |||
| st.d $ra, $sp, 0 | |||
| st.d N, $sp, 8 | |||
| st.d X, $sp, 16 | |||
| st.d INCX, $sp, 24 | |||
| #ifdef DYNAMIC_ARCH | |||
| bl camax_k_LA264 | |||
| #else | |||
| bl camax_k | |||
| #endif | |||
| ld.d $ra, $sp, 0 | |||
| ld.d N, $sp, 8 | |||
| ld.d X, $sp, 16 | |||
| ld.d INCX, $sp, 24 | |||
| addi.d $sp, $sp, 32 | |||
| frecip.s RCP, $f0 | |||
| vreplvei.w $vr3, $vr2, 0 | |||
| xvpermi.d VALPHA, $xr3,0x00 | |||
| xvxor.v res1, res1, res1 | |||
| xvxor.v res2, res2, res2 | |||
| fcmp.ceq.s $fcc0, $f0, $f19 | |||
| bcnez $fcc0, .L999 | |||
| li.d TEMP, SIZE | |||
| slli.d INCX, INCX, ZBASE_SHIFT | |||
| srai.d I, N, 2 | |||
| @@ -67,13 +92,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .align 3 | |||
| .L10: | |||
| xvld VX0, X, 0 * SIZE | |||
| xvfcvtl.d.s VX1, VX0 | |||
| xvfcvth.d.s VX2, VX0 | |||
| xvfmadd.d res1, VX1, VX1, res1 | |||
| xvfmadd.d res2, VX2, VX2, res2 | |||
| addi.d I, I, -1 | |||
| addi.d X, X, 8 * SIZE | |||
| xvld VX0, X, 0 * SIZE | |||
| xvld VX1, X, 8 * SIZE | |||
| xvfmul.s VX0, VX0, VALPHA | |||
| xvfmul.s VX1, VX1, VALPHA | |||
| xvfmadd.s res1, VX0, VX0, res1 | |||
| xvfmadd.s res2, VX1, VX1, res2 | |||
| addi.d X, X, 16 * SIZE | |||
| blt $r0, I, .L10 | |||
| .align 3 | |||
| b .L996 | |||
| @@ -103,22 +131,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| xvinsgr2vr.w VX0, t3, 6 | |||
| xvinsgr2vr.w VX0, t4, 7 | |||
| add.d X, X, INCX | |||
| xvfcvtl.d.s VX1, VX0 | |||
| xvfcvth.d.s VX2, VX0 | |||
| xvfmadd.d res1, VX1, VX1, res1 | |||
| xvfmadd.d res2, VX2, VX2, res2 | |||
| xvfmul.s VX0, VX0, VALPHA | |||
| xvfmadd.s res2, VX0, VX0, res2 | |||
| addi.d I, I, -1 | |||
| blt $r0, I, .L21 | |||
| b .L996 | |||
| .L996: | |||
| xvfadd.d res1, res1, res2 | |||
| xvpickve.d VX1, res1, 1 | |||
| xvpickve.d VX2, res1, 2 | |||
| xvpickve.d VX3, res1, 3 | |||
| xvfadd.d res1, VX1, res1 | |||
| xvfadd.d res1, VX2, res1 | |||
| xvfadd.d res1, VX3, res1 | |||
| xvfadd.s res1, res1, res2 | |||
| xvpermi.d VX1, res1, 0x4e | |||
| xvfadd.s res1, res1, VX1 | |||
| vreplvei.w $vr17, $vr19, 1 | |||
| vreplvei.w $vr18, $vr19, 2 | |||
| vreplvei.w $vr21, $vr19, 3 | |||
| xvfadd.s res1, VX2, res1 | |||
| xvfadd.s res1, VX3, res1 | |||
| xvfadd.s res1, VX4, res1 | |||
| .align 3 | |||
| .L997: | |||
| @@ -130,18 +158,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| fld.s a1, X, 0 * SIZE | |||
| fld.s a2, X, 1 * SIZE | |||
| addi.d I, I, -1 | |||
| fcvt.d.s a1, a1 | |||
| fcvt.d.s a2, a2 | |||
| fmadd.d res, a1, a1, res | |||
| fmadd.d res, a2, a2, res | |||
| fmul.s a1, a1, RCP | |||
| fmul.s a2, a2, RCP | |||
| fmadd.s res, a1, a1, res | |||
| fmadd.s res, a2, a2, res | |||
| add.d X, X, INCX | |||
| blt $r0, I, .L998 | |||
| .align 3 | |||
| .L999: | |||
| fsqrt.d res, res | |||
| fsqrt.s res, res | |||
| fmul.s $f0, res, $f0 | |||
| move $r4, $r17 | |||
| fcvt.s.d $f0, res | |||
| jirl $r0, $r1, 0x0 | |||
| EPILOGUE | |||