| @@ -35,7 +35,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m8) | #define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m8) | ||||
| #define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m8) | #define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m8) | ||||
| #define VFMUL_VF_FLOAT RISCV_RVV(vfmul_vf_f32m8) | #define VFMUL_VF_FLOAT RISCV_RVV(vfmul_vf_f32m8) | ||||
| #define VFILL_ZERO_FLOAT RISCV_RVV(vfsub_vv_f32m8) | |||||
| #define VREINTERPRET_FLOAT RISCV_RVV(vreinterpret_v_i32m8_f32m8) | |||||
| #define VFILL_INT RISCV_RVV(vmv_v_x_i32m8) | |||||
| #else | #else | ||||
| #define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) | #define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) | ||||
| #define FLOAT_V_T vfloat64m4_t | #define FLOAT_V_T vfloat64m4_t | ||||
| @@ -45,7 +46,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) | #define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) | ||||
| #define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) | #define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) | ||||
| #define VFMUL_VF_FLOAT RISCV_RVV(vfmul_vf_f64m4) | #define VFMUL_VF_FLOAT RISCV_RVV(vfmul_vf_f64m4) | ||||
| #define VFILL_ZERO_FLOAT RISCV_RVV(vfsub_vv_f64m4) | |||||
| #define VREINTERPRET_FLOAT RISCV_RVV(vreinterpret_v_i64m4_f64m4) | |||||
| #define VFILL_INT RISCV_RVV(vmv_v_x_i64m4) | |||||
| #endif | #endif | ||||
| int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) | int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) | ||||
| @@ -56,7 +58,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO | |||||
| if(n < 0) return(0); | if(n < 0) return(0); | ||||
| FLOAT *a_ptr = a; | FLOAT *a_ptr = a; | ||||
| FLOAT temp[4]; | FLOAT temp[4]; | ||||
| FLOAT_V_T va0, va1, vy0, vy1,vy0_temp, vy1_temp , temp_v ,va0_0 , va0_1 , va1_0 ,va1_1 ,va2_0 ,va2_1 ,va3_0 ,va3_1 ; | |||||
| FLOAT_V_T va0, va1, vy0, vy1,vy0_temp, vy1_temp ,va0_0 , va0_1 , va1_0 ,va1_1 ,va2_0 ,va2_1 ,va3_0 ,va3_1 ; | |||||
| unsigned int gvl = 0; | unsigned int gvl = 0; | ||||
| if(inc_y == 1 && inc_x == 1){ | if(inc_y == 1 && inc_x == 1){ | ||||
| gvl = VSETVL(m); | gvl = VSETVL(m); | ||||
| @@ -66,8 +68,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO | |||||
| ix = 0; | ix = 0; | ||||
| vy0_temp = VLEV_FLOAT(&y[j], gvl); | vy0_temp = VLEV_FLOAT(&y[j], gvl); | ||||
| vy1_temp = VLEV_FLOAT(&y[j+gvl], gvl); | vy1_temp = VLEV_FLOAT(&y[j+gvl], gvl); | ||||
| vy0 = VFILL_ZERO_FLOAT(vy0 , vy0 , gvl); | |||||
| vy1 = VFILL_ZERO_FLOAT(vy1 , vy1 , gvl); | |||||
| vy0 = VREINTERPRET_FLOAT(VFILL_INT(0, gvl)); | |||||
| vy1 = VREINTERPRET_FLOAT(VFILL_INT(0, gvl)); | |||||
| int i; | int i; | ||||
| int remainder = n % 4; | int remainder = n % 4; | ||||
| @@ -118,7 +120,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO | |||||
| a_ptr = a; | a_ptr = a; | ||||
| ix = 0; | ix = 0; | ||||
| vy0_temp = VLEV_FLOAT(&y[j], gvl); | vy0_temp = VLEV_FLOAT(&y[j], gvl); | ||||
| vy0 = VFILL_ZERO_FLOAT(vy0 , vy0 , gvl); | |||||
| vy0 = VREINTERPRET_FLOAT(VFILL_INT(0, gvl)); | |||||
| int i; | int i; | ||||
| int remainder = n % 4; | int remainder = n % 4; | ||||
| @@ -251,4 +253,4 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO | |||||
| } | } | ||||
| } | } | ||||
| return(0); | return(0); | ||||
| } | |||||
| } | |||||
| @@ -51,7 +51,7 @@ int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLO | |||||
| FLOAT *aptr,*bptr; | FLOAT *aptr,*bptr; | ||||
| size_t vl; | size_t vl; | ||||
| FLOAT_V_T va, vb,va1,vb1; | |||||
| FLOAT_V_T va,va1; | |||||
| if ( rows <= 0 ) return(0); | if ( rows <= 0 ) return(0); | ||||
| if ( cols <= 0 ) return(0); | if ( cols <= 0 ) return(0); | ||||
| @@ -81,7 +81,6 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||||
| FLOAT_V_T_M1 v_res; | FLOAT_V_T_M1 v_res; | ||||
| v_res = VFMVVF_FLOAT_M1(0, 1); | v_res = VFMVVF_FLOAT_M1(0, 1); | ||||
| MASK_T mask0, mask1; | |||||
| BLASLONG stride_x = inc_x * sizeof(FLOAT) * 2; | BLASLONG stride_x = inc_x * sizeof(FLOAT) * 2; | ||||
| gvl = VSETVL(n); | gvl = VSETVL(n); | ||||
| v_max = VFMVVF_FLOAT(0, gvl); | v_max = VFMVVF_FLOAT(0, gvl); | ||||
| @@ -83,7 +83,6 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||||
| FLOAT_V_T_M1 v_res; | FLOAT_V_T_M1 v_res; | ||||
| v_res = VFMVVF_FLOAT_M1(FLT_MAX, 1); | v_res = VFMVVF_FLOAT_M1(FLT_MAX, 1); | ||||
| MASK_T mask0, mask1; | |||||
| BLASLONG stride_x = inc_x * sizeof(FLOAT) * 2; | BLASLONG stride_x = inc_x * sizeof(FLOAT) * 2; | ||||
| gvl = VSETVL(n); | gvl = VSETVL(n); | ||||
| v_min = VFMVVF_FLOAT(FLT_MAX, gvl); | v_min = VFMVVF_FLOAT(FLT_MAX, gvl); | ||||
| @@ -76,14 +76,13 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||||
| FLOAT asumf=0.0; | FLOAT asumf=0.0; | ||||
| if (n <= 0 || inc_x <= 0) return(asumf); | if (n <= 0 || inc_x <= 0) return(asumf); | ||||
| unsigned int gvl = 0; | unsigned int gvl = 0; | ||||
| FLOAT_V_T v0, v1, v_zero,v_sum; | |||||
| FLOAT_V_T v0, v1,v_sum; | |||||
| FLOAT_V_T_M1 v_res; | FLOAT_V_T_M1 v_res; | ||||
| v_res = VFMVVF_FLOAT_M1(0, 1); | v_res = VFMVVF_FLOAT_M1(0, 1); | ||||
| if(inc_x == 1){ | if(inc_x == 1){ | ||||
| BLASLONG n2 = n * 2; | BLASLONG n2 = n * 2; | ||||
| gvl = VSETVL(n2); | gvl = VSETVL(n2); | ||||
| v_zero = VFMVVF_FLOAT(0, gvl); | |||||
| if(gvl <= n2/2){ | if(gvl <= n2/2){ | ||||
| v_sum = VFMVVF_FLOAT(0, gvl); | v_sum = VFMVVF_FLOAT(0, gvl); | ||||
| for(i=0,j=0; i<n2/(gvl*2); i++){ | for(i=0,j=0; i<n2/(gvl*2); i++){ | ||||
| @@ -108,7 +107,6 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||||
| }else{ | }else{ | ||||
| gvl = VSETVL(n); | gvl = VSETVL(n); | ||||
| unsigned int stride_x = inc_x * sizeof(FLOAT) * 2; | unsigned int stride_x = inc_x * sizeof(FLOAT) * 2; | ||||
| v_zero = VFMVVF_FLOAT(0, gvl); | |||||
| BLASLONG inc_xv = inc_x * 2 * gvl; | BLASLONG inc_xv = inc_x * 2 * gvl; | ||||
| v_sum = VFMVVF_FLOAT(0, gvl); | v_sum = VFMVVF_FLOAT(0, gvl); | ||||
| @@ -55,8 +55,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, | |||||
| BLASLONG i = 0, j = 0, k = 0; | BLASLONG i = 0, j = 0, k = 0; | ||||
| BLASLONG ix = 0, iy = 0; | BLASLONG ix = 0, iy = 0; | ||||
| FLOAT *a_ptr = a; | FLOAT *a_ptr = a; | ||||
| FLOAT temp_r = 0.0, temp_i = 0.0, temp_r1, temp_i1, temp_r2, temp_i2, temp_r3, temp_i3, temp_rr[4], temp_ii[4]; | |||||
| FLOAT_V_T va0, va1, vy0, vy1, vy0_new, vy1_new, va2, va3, va4, va5, va6, va7, temp_iv, temp_rv, x_v0, x_v1, temp_v1, temp_v2, temp_v3, temp_v4; | |||||
| FLOAT temp_r = 0.0, temp_i = 0.0, temp_rr[4], temp_ii[4]; | |||||
| FLOAT_V_T va0, va1, vy0, vy1, vy0_new, vy1_new, va2, va3, va4, va5, va6, va7, temp_iv, temp_rv, x_v0, x_v1; | |||||
| unsigned int gvl = 0; | unsigned int gvl = 0; | ||||
| BLASLONG stride_a = sizeof(FLOAT) * 2; | BLASLONG stride_a = sizeof(FLOAT) * 2; | ||||
| BLASLONG stride_y = inc_y * sizeof(FLOAT) * 2; | BLASLONG stride_y = inc_y * sizeof(FLOAT) * 2; | ||||
| @@ -71,14 +71,13 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||||
| FLOAT asumf=0.0; | FLOAT asumf=0.0; | ||||
| if (n <= 0 || inc_x <= 0) return(asumf); | if (n <= 0 || inc_x <= 0) return(asumf); | ||||
| unsigned int gvl = 0; | unsigned int gvl = 0; | ||||
| FLOAT_V_T v0, v1, v_zero,v_sum; | |||||
| FLOAT_V_T v0, v1,v_sum; | |||||
| FLOAT_V_T_M1 v_res; | FLOAT_V_T_M1 v_res; | ||||
| v_res = VFMVVF_FLOAT_M1(0, 1); | v_res = VFMVVF_FLOAT_M1(0, 1); | ||||
| if(inc_x == 1){ | if(inc_x == 1){ | ||||
| BLASLONG n2 = n * 2; | BLASLONG n2 = n * 2; | ||||
| gvl = VSETVL(n2); | gvl = VSETVL(n2); | ||||
| v_zero = VFMVVF_FLOAT(0, gvl); | |||||
| if(gvl <= n2/2){ | if(gvl <= n2/2){ | ||||
| v_sum = VFMVVF_FLOAT(0, gvl); | v_sum = VFMVVF_FLOAT(0, gvl); | ||||
| for(i=0,j=0; i<n2/(gvl*2); i++){ | for(i=0,j=0; i<n2/(gvl*2); i++){ | ||||
| @@ -100,7 +99,6 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||||
| }else{ | }else{ | ||||
| gvl = VSETVL(n); | gvl = VSETVL(n); | ||||
| unsigned int stride_x = inc_x * sizeof(FLOAT) * 2; | unsigned int stride_x = inc_x * sizeof(FLOAT) * 2; | ||||
| v_zero = VFMVVF_FLOAT(0, gvl); | |||||
| BLASLONG inc_xv = inc_x * 2 * gvl; | BLASLONG inc_xv = inc_x * 2 * gvl; | ||||
| v_sum = VFMVVF_FLOAT(0, gvl); | v_sum = VFMVVF_FLOAT(0, gvl); | ||||