|
|
@@ -3,6 +3,26 @@ |
|
|
#include "functable.h" |
|
|
#include "functable.h" |
|
|
#endif |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
#if defined(RISCV_SIMD) |
|
|
|
|
|
#if !defined(DOUBLE) |
|
|
|
|
|
#define VSETVL(n) __riscv_vsetvl_e32m8(n) |
|
|
|
|
|
#define FLOAT_V_T vfloat32m8_t |
|
|
|
|
|
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 |
|
|
|
|
|
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8 |
|
|
|
|
|
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8 |
|
|
|
|
|
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 |
|
|
|
|
|
#define VFMSACVF_FLOAT __riscv_vfmsac_vf_f32m8 |
|
|
|
|
|
#else |
|
|
|
|
|
#define VSETVL(n) __riscv_vsetvl_e64m8(n) |
|
|
|
|
|
#define FLOAT_V_T vfloat64m8_t |
|
|
|
|
|
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 |
|
|
|
|
|
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8 |
|
|
|
|
|
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8 |
|
|
|
|
|
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 |
|
|
|
|
|
#define VFMSACVF_FLOAT __riscv_vfmsac_vf_f64m8 |
|
|
|
|
|
#endif |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
#ifndef CBLAS |
|
|
#ifndef CBLAS |
|
|
|
|
|
|
|
|
void NAME(blasint *N, FLOAT *dx, blasint *INCX, FLOAT *dy, blasint *INCY, FLOAT *dparam){ |
|
|
void NAME(blasint *N, FLOAT *dx, blasint *INCX, FLOAT *dy, blasint *INCY, FLOAT *dparam){ |
|
|
@@ -25,6 +45,11 @@ void CNAME(blasint n, FLOAT *dx, blasint incx, FLOAT *dy, blasint incy, FLOAT *d |
|
|
FLOAT dh11, dh12, dh22, dh21, dflag; |
|
|
FLOAT dh11, dh12, dh22, dh21, dflag; |
|
|
blasint nsteps; |
|
|
blasint nsteps; |
|
|
|
|
|
|
|
|
|
|
|
#if defined(RISCV_SIMD) |
|
|
|
|
|
FLOAT_V_T v_w, v_z__, v_dx, v_dy; |
|
|
|
|
|
blasint stride, stride_x, stride_y, offset; |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
#ifndef CBLAS |
|
|
#ifndef CBLAS |
|
|
PRINT_DEBUG_CNAME; |
|
|
PRINT_DEBUG_CNAME; |
|
|
#else |
|
|
#else |
|
|
@@ -53,6 +78,7 @@ L10: |
|
|
dh21 = dparam[3]; |
|
|
dh21 = dparam[3]; |
|
|
i__1 = nsteps; |
|
|
i__1 = nsteps; |
|
|
i__2 = incx; |
|
|
i__2 = incx; |
|
|
|
|
|
#if !defined(RISCV_SIMD) |
|
|
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { |
|
|
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { |
|
|
w = dx[i__]; |
|
|
w = dx[i__]; |
|
|
z__ = dy[i__]; |
|
|
z__ = dy[i__]; |
|
|
@@ -60,12 +86,36 @@ L10: |
|
|
dy[i__] = w * dh21 + z__; |
|
|
dy[i__] = w * dh21 + z__; |
|
|
/* L20: */ |
|
|
/* L20: */ |
|
|
} |
|
|
} |
|
|
|
|
|
#else |
|
|
|
|
|
if(i__2 < 0){ |
|
|
|
|
|
offset = i__1 - 2; |
|
|
|
|
|
dx += offset; |
|
|
|
|
|
dy += offset; |
|
|
|
|
|
i__1 = -i__1; |
|
|
|
|
|
i__2 = -i__2; |
|
|
|
|
|
} |
|
|
|
|
|
stride = i__2 * sizeof(FLOAT); |
|
|
|
|
|
n = i__1 / i__2; |
|
|
|
|
|
for (size_t vl; n > 0; n -= vl, dx += vl*i__2, dy += vl*i__2) { |
|
|
|
|
|
vl = VSETVL(n); |
|
|
|
|
|
|
|
|
|
|
|
v_w = VLSEV_FLOAT(&dx[1], stride, vl); |
|
|
|
|
|
v_z__ = VLSEV_FLOAT(&dy[1], stride, vl); |
|
|
|
|
|
|
|
|
|
|
|
v_dx = VFMACCVF_FLOAT(v_w, dh12, v_z__, vl); |
|
|
|
|
|
v_dy = VFMACCVF_FLOAT(v_z__, dh21, v_w, vl); |
|
|
|
|
|
|
|
|
|
|
|
VSSEV_FLOAT(&dx[1], stride, v_dx, vl); |
|
|
|
|
|
VSSEV_FLOAT(&dy[1], stride, v_dy, vl); |
|
|
|
|
|
} |
|
|
|
|
|
#endif |
|
|
goto L140; |
|
|
goto L140; |
|
|
L30: |
|
|
L30: |
|
|
dh11 = dparam[2]; |
|
|
dh11 = dparam[2]; |
|
|
dh22 = dparam[5]; |
|
|
dh22 = dparam[5]; |
|
|
i__2 = nsteps; |
|
|
i__2 = nsteps; |
|
|
i__1 = incx; |
|
|
i__1 = incx; |
|
|
|
|
|
#if !defined(RISCV_SIMD) |
|
|
for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { |
|
|
for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { |
|
|
w = dx[i__]; |
|
|
w = dx[i__]; |
|
|
z__ = dy[i__]; |
|
|
z__ = dy[i__]; |
|
|
@@ -73,6 +123,29 @@ L30: |
|
|
dy[i__] = -w + dh22 * z__; |
|
|
dy[i__] = -w + dh22 * z__; |
|
|
/* L40: */ |
|
|
/* L40: */ |
|
|
} |
|
|
} |
|
|
|
|
|
#else |
|
|
|
|
|
if(i__1 < 0){ |
|
|
|
|
|
offset = i__2 - 2; |
|
|
|
|
|
dx += offset; |
|
|
|
|
|
dy += offset; |
|
|
|
|
|
i__1 = -i__1; |
|
|
|
|
|
i__2 = -i__2; |
|
|
|
|
|
} |
|
|
|
|
|
stride = i__1 * sizeof(FLOAT); |
|
|
|
|
|
n = i__2 / i__1; |
|
|
|
|
|
for (size_t vl; n > 0; n -= vl, dx += vl*i__1, dy += vl*i__1) { |
|
|
|
|
|
vl = VSETVL(n); |
|
|
|
|
|
|
|
|
|
|
|
v_w = VLSEV_FLOAT(&dx[1], stride, vl); |
|
|
|
|
|
v_z__ = VLSEV_FLOAT(&dy[1], stride, vl); |
|
|
|
|
|
|
|
|
|
|
|
v_dx = VFMACCVF_FLOAT(v_z__, dh11, v_w, vl); |
|
|
|
|
|
v_dy = VFMSACVF_FLOAT(v_w, dh22, v_z__, vl); |
|
|
|
|
|
|
|
|
|
|
|
VSSEV_FLOAT(&dx[1], stride, v_dx, vl); |
|
|
|
|
|
VSSEV_FLOAT(&dy[1], stride, v_dy, vl); |
|
|
|
|
|
} |
|
|
|
|
|
#endif |
|
|
goto L140; |
|
|
goto L140; |
|
|
L50: |
|
|
L50: |
|
|
dh11 = dparam[2]; |
|
|
dh11 = dparam[2]; |
|
|
@@ -81,6 +154,7 @@ L50: |
|
|
dh22 = dparam[5]; |
|
|
dh22 = dparam[5]; |
|
|
i__1 = nsteps; |
|
|
i__1 = nsteps; |
|
|
i__2 = incx; |
|
|
i__2 = incx; |
|
|
|
|
|
#if !defined(RISCV_SIMD) |
|
|
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { |
|
|
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { |
|
|
w = dx[i__]; |
|
|
w = dx[i__]; |
|
|
z__ = dy[i__]; |
|
|
z__ = dy[i__]; |
|
|
@@ -88,6 +162,31 @@ L50: |
|
|
dy[i__] = w * dh21 + z__ * dh22; |
|
|
dy[i__] = w * dh21 + z__ * dh22; |
|
|
/* L60: */ |
|
|
/* L60: */ |
|
|
} |
|
|
} |
|
|
|
|
|
#else |
|
|
|
|
|
if(i__2 < 0){ |
|
|
|
|
|
offset = i__1 - 2; |
|
|
|
|
|
dx += offset; |
|
|
|
|
|
dy += offset; |
|
|
|
|
|
i__1 = -i__1; |
|
|
|
|
|
i__2 = -i__2; |
|
|
|
|
|
} |
|
|
|
|
|
stride = i__2 * sizeof(FLOAT); |
|
|
|
|
|
n = i__1 / i__2; |
|
|
|
|
|
for (size_t vl; n > 0; n -= vl, dx += vl*i__2, dy += vl*i__2) { |
|
|
|
|
|
vl = VSETVL(n); |
|
|
|
|
|
|
|
|
|
|
|
v_w = VLSEV_FLOAT(&dx[1], stride, vl); |
|
|
|
|
|
v_z__ = VLSEV_FLOAT(&dy[1], stride, vl); |
|
|
|
|
|
|
|
|
|
|
|
v_dx = VFMULVF_FLOAT(v_w, dh11, vl); |
|
|
|
|
|
v_dx = VFMACCVF_FLOAT(v_dx, dh12, v_z__, vl); |
|
|
|
|
|
VSSEV_FLOAT(&dx[1], stride, v_dx, vl); |
|
|
|
|
|
|
|
|
|
|
|
v_dy = VFMULVF_FLOAT(v_w, dh21, vl); |
|
|
|
|
|
v_dy = VFMACCVF_FLOAT(v_dy, dh22, v_z__, vl); |
|
|
|
|
|
VSSEV_FLOAT(&dy[1], stride, v_dy, vl); |
|
|
|
|
|
} |
|
|
|
|
|
#endif |
|
|
goto L140; |
|
|
goto L140; |
|
|
L70: |
|
|
L70: |
|
|
kx = 1; |
|
|
kx = 1; |
|
|
@@ -110,6 +209,7 @@ L80: |
|
|
dh12 = dparam[4]; |
|
|
dh12 = dparam[4]; |
|
|
dh21 = dparam[3]; |
|
|
dh21 = dparam[3]; |
|
|
i__2 = n; |
|
|
i__2 = n; |
|
|
|
|
|
#if !defined(RISCV_SIMD) |
|
|
for (i__ = 1; i__ <= i__2; ++i__) { |
|
|
for (i__ = 1; i__ <= i__2; ++i__) { |
|
|
w = dx[kx]; |
|
|
w = dx[kx]; |
|
|
z__ = dy[ky]; |
|
|
z__ = dy[ky]; |
|
|
@@ -119,11 +219,36 @@ L80: |
|
|
ky += incy; |
|
|
ky += incy; |
|
|
/* L90: */ |
|
|
/* L90: */ |
|
|
} |
|
|
} |
|
|
|
|
|
#else |
|
|
|
|
|
if(incx < 0){ |
|
|
|
|
|
incx = -incx; |
|
|
|
|
|
dx -= n*incx; |
|
|
|
|
|
} |
|
|
|
|
|
if(incy < 0){ |
|
|
|
|
|
incy = -incy; |
|
|
|
|
|
dy -= n*incy; |
|
|
|
|
|
} |
|
|
|
|
|
stride_x = incx * sizeof(FLOAT); |
|
|
|
|
|
stride_y = incy * sizeof(FLOAT); |
|
|
|
|
|
for (size_t vl; n > 0; n -= vl, dx += vl*incx, dy += vl*incy) { |
|
|
|
|
|
vl = VSETVL(n); |
|
|
|
|
|
|
|
|
|
|
|
v_w = VLSEV_FLOAT(&dx[kx], stride_x, vl); |
|
|
|
|
|
v_z__ = VLSEV_FLOAT(&dy[ky], stride_y, vl); |
|
|
|
|
|
|
|
|
|
|
|
v_dx = VFMACCVF_FLOAT(v_w, dh12, v_z__, vl); |
|
|
|
|
|
v_dy = VFMACCVF_FLOAT(v_z__, dh21, v_w, vl); |
|
|
|
|
|
|
|
|
|
|
|
VSSEV_FLOAT(&dx[kx], stride_x, v_dx, vl); |
|
|
|
|
|
VSSEV_FLOAT(&dy[ky], stride_y, v_dy, vl); |
|
|
|
|
|
} |
|
|
|
|
|
#endif |
|
|
goto L140; |
|
|
goto L140; |
|
|
L100: |
|
|
L100: |
|
|
dh11 = dparam[2]; |
|
|
dh11 = dparam[2]; |
|
|
dh22 = dparam[5]; |
|
|
dh22 = dparam[5]; |
|
|
i__2 = n; |
|
|
i__2 = n; |
|
|
|
|
|
#if !defined(RISCV_SIMD) |
|
|
for (i__ = 1; i__ <= i__2; ++i__) { |
|
|
for (i__ = 1; i__ <= i__2; ++i__) { |
|
|
w = dx[kx]; |
|
|
w = dx[kx]; |
|
|
z__ = dy[ky]; |
|
|
z__ = dy[ky]; |
|
|
@@ -133,8 +258,33 @@ L100: |
|
|
ky += incy; |
|
|
ky += incy; |
|
|
/* L110: */ |
|
|
/* L110: */ |
|
|
} |
|
|
} |
|
|
|
|
|
#else |
|
|
|
|
|
if(incx < 0){ |
|
|
|
|
|
incx = -incx; |
|
|
|
|
|
dx -= n*incx; |
|
|
|
|
|
} |
|
|
|
|
|
if(incy < 0){ |
|
|
|
|
|
incy = -incy; |
|
|
|
|
|
dy -= n*incy; |
|
|
|
|
|
} |
|
|
|
|
|
stride_x = incx * sizeof(FLOAT); |
|
|
|
|
|
stride_y = incy * sizeof(FLOAT); |
|
|
|
|
|
for (size_t vl; n > 0; n -= vl, dx += vl*incx, dy += vl*incy) { |
|
|
|
|
|
vl = VSETVL(n); |
|
|
|
|
|
|
|
|
|
|
|
v_w = VLSEV_FLOAT(&dx[kx], stride_x, vl); |
|
|
|
|
|
v_z__ = VLSEV_FLOAT(&dy[ky], stride_y, vl); |
|
|
|
|
|
|
|
|
|
|
|
v_dx = VFMACCVF_FLOAT(v_z__, dh11, v_w, vl); |
|
|
|
|
|
v_dy = VFMSACVF_FLOAT(v_w, dh22, v_z__, vl); |
|
|
|
|
|
|
|
|
|
|
|
VSSEV_FLOAT(&dx[kx], stride_x, v_dx, vl); |
|
|
|
|
|
VSSEV_FLOAT(&dy[ky], stride_y, v_dy, vl); |
|
|
|
|
|
} |
|
|
|
|
|
#endif |
|
|
goto L140; |
|
|
goto L140; |
|
|
L120: |
|
|
L120: |
|
|
|
|
|
#if !defined(RISCV_SIMD) |
|
|
dh11 = dparam[2]; |
|
|
dh11 = dparam[2]; |
|
|
dh12 = dparam[4]; |
|
|
dh12 = dparam[4]; |
|
|
dh21 = dparam[3]; |
|
|
dh21 = dparam[3]; |
|
|
@@ -149,6 +299,32 @@ L120: |
|
|
ky += incy; |
|
|
ky += incy; |
|
|
/* L130: */ |
|
|
/* L130: */ |
|
|
} |
|
|
} |
|
|
|
|
|
#else |
|
|
|
|
|
if(incx < 0){ |
|
|
|
|
|
incx = -incx; |
|
|
|
|
|
dx -= n*incx; |
|
|
|
|
|
} |
|
|
|
|
|
if(incy < 0){ |
|
|
|
|
|
incy = -incy; |
|
|
|
|
|
dy -= n*incy; |
|
|
|
|
|
} |
|
|
|
|
|
stride_x = incx * sizeof(FLOAT); |
|
|
|
|
|
stride_y = incy * sizeof(FLOAT); |
|
|
|
|
|
for (size_t vl; n > 0; n -= vl, dx += vl*incx, dy += vl*incy) { |
|
|
|
|
|
vl = VSETVL(n); |
|
|
|
|
|
|
|
|
|
|
|
v_w = VLSEV_FLOAT(&dx[kx], stride_x, vl); |
|
|
|
|
|
v_z__ = VLSEV_FLOAT(&dy[ky], stride_y, vl); |
|
|
|
|
|
|
|
|
|
|
|
v_dx = VFMULVF_FLOAT(v_w, dh11, vl); |
|
|
|
|
|
v_dx = VFMACCVF_FLOAT(v_dx, dh12, v_z__, vl); |
|
|
|
|
|
VSSEV_FLOAT(&dx[kx], stride_x, v_dx, vl); |
|
|
|
|
|
|
|
|
|
|
|
v_dy = VFMULVF_FLOAT(v_w, dh21, vl); |
|
|
|
|
|
v_dy = VFMACCVF_FLOAT(v_dy, dh22, v_z__, vl); |
|
|
|
|
|
VSSEV_FLOAT(&dy[ky], stride_y, v_dy, vl); |
|
|
|
|
|
} |
|
|
|
|
|
#endif |
|
|
L140: |
|
|
L140: |
|
|
return; |
|
|
return; |
|
|
} |
|
|
} |
|
|
|