#include "common.h" #include #ifdef DOUBLE #define SVE_TYPE svfloat64_t #define SVE_ZERO svdup_f64(0.0) #define SVE_WHILELT svwhilelt_b64 #define SVE_ALL svptrue_b64() #define SVE_WIDTH svcntd() #else #define SVE_TYPE svfloat32_t #define SVE_ZERO svdup_f32(0.0) #define SVE_WHILELT svwhilelt_b32 #define SVE_ALL svptrue_b32() #define SVE_WIDTH svcntw() #endif static FLOAT dgemv_kernel_sve(BLASLONG i, FLOAT *x, BLASLONG lda, FLOAT *y, BLASLONG incx, BLASLONG n){ SVE_TYPE acc_a = SVE_ZERO; SVE_TYPE acc_b = SVE_ZERO; BLASLONG sve_width = SVE_WIDTH; for (BLASLONG j = 0; j < n; j += sve_width * 2) { svbool_t pg_a = SVE_WHILELT(j, n); svbool_t pg_b = SVE_WHILELT(j + sve_width, n); SVE_TYPE x_vec_a = svld1(pg_a, &x[i*lda+j]); SVE_TYPE y_vec_a = svld1(pg_a, &y[j*incx]); SVE_TYPE x_vec_b = svld1(pg_b, &x[i*lda+j + sve_width]); SVE_TYPE y_vec_b = svld1(pg_b, &y[j*incx + sve_width]); acc_a = svmla_m(pg_a, acc_a, x_vec_a, y_vec_a); acc_b = svmla_m(pg_b, acc_b, x_vec_b, y_vec_b); } return svaddv(SVE_ALL, acc_a) + svaddv(SVE_ALL, acc_b); }