From 98b5ef929cfc98f2f3c236966830276c255118d2 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 12 Feb 2025 09:04:22 +0100 Subject: [PATCH] Restore the non-vectorized code from before PR4880 for POWER8 --- kernel/power/sgemv_t.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/kernel/power/sgemv_t.c b/kernel/power/sgemv_t.c index e133c815c..ed0a24230 100644 --- a/kernel/power/sgemv_t.c +++ b/kernel/power/sgemv_t.c @@ -78,7 +78,17 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA temp7 += v_x[i] * va7[i]; } - + #if defined(POWER8) + y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]); + y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]); + y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]); + y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]); + + y[4] += alpha * (temp4[0] + temp4[1]+temp4[2] + temp4[3]); + y[5] += alpha * (temp5[0] + temp5[1]+temp5[2] + temp5[3]); + y[6] += alpha * (temp6[0] + temp6[1]+temp6[2] + temp6[3]); + y[7] += alpha * (temp7[0] + temp7[1]+temp7[2] + temp7[3]); + #else register __vector float t0, t1, t2, t3; register __vector float a = { alpha, alpha, alpha, alpha }; __vector float *v_y = (__vector float*) y; @@ -105,7 +115,7 @@ static void sgemv_kernel_4x8(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA v_y[0] += a * temp0; v_y[1] += a * temp4; - +#endif } @@ -132,7 +142,12 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA temp2 += v_x[i] * va2[i]; temp3 += v_x[i] * va3[i]; } - + #if defined(POWER8) + y[0] += alpha * (temp0[0] + temp0[1]+temp0[2] + temp0[3]); + y[1] += alpha * (temp1[0] + temp1[1]+temp1[2] + temp1[3]); + y[2] += alpha * (temp2[0] + temp2[1]+temp2[2] + temp2[3]); + y[3] += alpha * (temp3[0] + temp3[1]+temp3[2] + temp3[3]); + #else register __vector float t0, t1, t2, t3; register __vector float a = { alpha, alpha, alpha, alpha }; __vector float *v_y = (__vector float*) y; @@ -148,7 +163,7 @@ static void sgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA temp0 += temp1 + temp2 + temp3; v_y[0] += a * temp0; - +#endif }