Browse Source

loongarch: Fixed {s/d/sc/dz}amin LASX opt

tags/v0.3.27
gxw 2 years ago
parent
commit
a10dde5554
2 changed files with 23 additions and 8 deletions
  1. +3
    -3
      kernel/loongarch64/amin_lasx.S
  2. +20
    -5
      kernel/loongarch64/camin_lasx.S

+ 3
- 3
kernel/loongarch64/amin_lasx.S View File

@@ -160,8 +160,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.d VX1, t2, 1
xvinsgr2vr.d VX1, t3, 2
xvinsgr2vr.d VX1, t4, 3
xvfmaxa.d VM1, VX0, VX1
xvfmaxa.d VM0, VM0, VM1
xvfmina.d VM1, VX0, VX1
xvfmina.d VM0, VM0, VM1
#else
ld.w t1, X, 0
add.d X, X, INCX
@@ -187,7 +187,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w VM1, t2, 5
xvinsgr2vr.w VM1, t3, 6
xvinsgr2vr.w VM1, t4, 7
xvfmaxa.s VM0, VM0, VM1
xvfmina.s VM0, VM0, VM1
#endif
addi.d I, I, -1
blt $r0, I, .L21


+ 20
- 5
kernel/loongarch64/camin_lasx.S View File

@@ -116,15 +116,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef DOUBLE
xvpickve.d x1, VM0, 0
xvpickve.d x2, VM0, 1
XVFMIN VM0, x1, x2
XVFMIN VX0, x1, x2
xvpickve.d x1, VM0, 2
xvpickve.d x2, VM0, 3
XVFMIN VX1, x1, x2
XVFMIN VM0, VX0, VX1
#else
xvpickve.w x1, VM0, 0
xvpickve.w x2, VM0, 1
xvpickve.w x3, VM0, 2
xvpickve.w x4, VM0, 3
XVFMIN VX0, x1, x2
XVFMIN VX1, x3, x4
XVFMIN VX0, VX0, VX1
xvpickve.w x1, VM0, 4
xvpickve.w x2, VM0, 5
xvpickve.w x3, VM0, 6
xvpickve.w x4, VM0, 7
XVFMIN VM0, x1, x2
XVFMIN VM1, x3, x4
XVFMIN VM0, VM0, VM1
XVFMIN VM0, VM0, VX0
#endif
b .L23
.align 3
@@ -159,7 +171,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FABS t4, t4
ADD t1, t1, t2
ADD t3, t3, t4
FMIN s1, t1, t3
FMIN s2, t1, t3
LD t1, X, 0 * SIZE
LD t2, X, 1 * SIZE
add.d X, X, INCX
@@ -187,13 +199,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ADD t1, t1, t2
ADD t3, t3, t4
FMIN s4, t1, t3

FMIN s1, s1, s2
FMIN s3, s3, s4
FMIN a0, a0, s3
FMIN a0, a0, s1
blt $r0, I, .L21
.align 3

.L22:
FMIN s1, s1, s2
FMIN s3, s3, s4
FMIN s1, s1, s3
MOV s1, a0
.align 3

.L23: //N<8


Loading…
Cancel
Save