| @@ -48,12 +48,17 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| BLASLONG i, js; | BLASLONG i, js; | ||||
| BLASLONG X; | BLASLONG X; | ||||
| svint64_t index = svindex_s64(0LL, lda); | |||||
| FLOAT *ao; | |||||
| js = 0; | js = 0; | ||||
| FLOAT *ao; | |||||
| #ifdef DOUBLE | |||||
| svint64_t index = svindex_s64(0LL, lda); | |||||
| svbool_t pn = svwhilelt_b64(js, n); | svbool_t pn = svwhilelt_b64(js, n); | ||||
| int n_active = svcntp_b64(svptrue_b64(), pn); | int n_active = svcntp_b64(svptrue_b64(), pn); | ||||
| #else | |||||
| svint32_t index = svindex_s32(0, lda); | |||||
| svbool_t pn = svwhilelt_b32(js, n); | |||||
| int n_active = svcntp_b32(svptrue_b32(), pn); | |||||
| #endif | |||||
| do | do | ||||
| { | { | ||||
| X = posX; | X = posX; | ||||
| @@ -68,7 +73,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| do | do | ||||
| { | { | ||||
| if (X > posY) { | if (X > posY) { | ||||
| #ifdef DOUBLE | |||||
| svfloat64_t aj_vec = svld1_gather_index(pn, ao, index); | svfloat64_t aj_vec = svld1_gather_index(pn, ao, index); | ||||
| #else | |||||
| svfloat32_t aj_vec = svld1_gather_index(pn, ao, index); | |||||
| #endif | |||||
| svst1(pn, b, aj_vec); | svst1(pn, b, aj_vec); | ||||
| ao ++; | ao ++; | ||||
| b += n_active; | b += n_active; | ||||
| @@ -113,9 +122,15 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| posY += n_active; | posY += n_active; | ||||
| js += n_active; | js += n_active; | ||||
| #ifdef DOUBLE | |||||
| pn = svwhilelt_b64(js, n); | pn = svwhilelt_b64(js, n); | ||||
| n_active = svcntp_b64(svptrue_b64(), pn); | n_active = svcntp_b64(svptrue_b64(), pn); | ||||
| } while (svptest_any(svptrue_b64(), pn)); | } while (svptest_any(svptrue_b64(), pn)); | ||||
| #else | |||||
| pn = svwhilelt_b32(js, n); | |||||
| n_active = svcntp_b32(svptrue_b32(), pn); | |||||
| } while (svptest_any(svptrue_b32(), pn)); | |||||
| #endif | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -50,8 +50,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| FLOAT *ao; | FLOAT *ao; | ||||
| js = 0; | js = 0; | ||||
| #ifdef DOUBLE | |||||
| svbool_t pn = svwhilelt_b64(js, n); | svbool_t pn = svwhilelt_b64(js, n); | ||||
| int n_active = svcntp_b64(svptrue_b64(), pn); | int n_active = svcntp_b64(svptrue_b64(), pn); | ||||
| #else | |||||
| svbool_t pn = svwhilelt_b32(js, n); | |||||
| int n_active = svcntp_b32(svptrue_b32(), pn); | |||||
| #endif | |||||
| do | do | ||||
| { | { | ||||
| X = posX; | X = posX; | ||||
| @@ -72,7 +77,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| i ++; | i ++; | ||||
| } else | } else | ||||
| if (X < posY) { | if (X < posY) { | ||||
| #ifdef DOUBLE | |||||
| svfloat64_t aj_vec = svld1(pn, ao); | svfloat64_t aj_vec = svld1(pn, ao); | ||||
| #else | |||||
| svfloat32_t aj_vec = svld1(pn, ao); | |||||
| #endif | |||||
| svst1(pn, b, aj_vec); | svst1(pn, b, aj_vec); | ||||
| ao += lda; | ao += lda; | ||||
| b += n_active; | b += n_active; | ||||
| @@ -112,9 +121,15 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| posY += n_active; | posY += n_active; | ||||
| js += n_active; | js += n_active; | ||||
| #ifdef DOUBLE | |||||
| pn = svwhilelt_b64(js, n); | pn = svwhilelt_b64(js, n); | ||||
| n_active = svcntp_b64(svptrue_b64(), pn); | n_active = svcntp_b64(svptrue_b64(), pn); | ||||
| } while (svptest_any(svptrue_b64(), pn)); | } while (svptest_any(svptrue_b64(), pn)); | ||||
| #else | |||||
| pn = svwhilelt_b32(js, n); | |||||
| n_active = svcntp_b32(svptrue_b32(), pn); | |||||
| } while (svptest_any(svptrue_b32(), pn)); | |||||
| #endif | |||||
| return 0; | return 0; | ||||
| @@ -48,12 +48,17 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| BLASLONG i, js; | BLASLONG i, js; | ||||
| BLASLONG X; | BLASLONG X; | ||||
| svint64_t index = svindex_s64(0LL, lda); | |||||
| FLOAT *ao; | |||||
| js = 0; | js = 0; | ||||
| FLOAT *ao; | |||||
| #ifdef DOUBLE | |||||
| svint64_t index = svindex_s64(0LL, lda); | |||||
| svbool_t pn = svwhilelt_b64(js, n); | svbool_t pn = svwhilelt_b64(js, n); | ||||
| int n_active = svcntp_b64(svptrue_b64(), pn); | int n_active = svcntp_b64(svptrue_b64(), pn); | ||||
| #else | |||||
| svint32_t index = svindex_s32(0, lda); | |||||
| svbool_t pn = svwhilelt_b32(js, n); | |||||
| int n_active = svcntp_b32(svptrue_b32(), pn); | |||||
| #endif | |||||
| do | do | ||||
| { | { | ||||
| X = posX; | X = posX; | ||||
| @@ -68,7 +73,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| do | do | ||||
| { | { | ||||
| if (X < posY) { | if (X < posY) { | ||||
| #ifdef DOUBLE | |||||
| svfloat64_t aj_vec = svld1_gather_index(pn, ao, index); | svfloat64_t aj_vec = svld1_gather_index(pn, ao, index); | ||||
| #else | |||||
| svfloat32_t aj_vec = svld1_gather_index(pn, ao, index); | |||||
| #endif | |||||
| svst1(pn, b, aj_vec); | svst1(pn, b, aj_vec); | ||||
| ao ++; | ao ++; | ||||
| b += n_active; | b += n_active; | ||||
| @@ -113,9 +122,15 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| posY += n_active; | posY += n_active; | ||||
| js += n_active; | js += n_active; | ||||
| #ifdef DOUBLE | |||||
| pn = svwhilelt_b64(js, n); | pn = svwhilelt_b64(js, n); | ||||
| n_active = svcntp_b64(svptrue_b64(), pn); | n_active = svcntp_b64(svptrue_b64(), pn); | ||||
| } while (svptest_any(svptrue_b64(), pn)); | } while (svptest_any(svptrue_b64(), pn)); | ||||
| #else | |||||
| pn = svwhilelt_b32(js, n); | |||||
| n_active = svcntp_b32(svptrue_b32(), pn); | |||||
| } while (svptest_any(svptrue_b32(), pn)); | |||||
| #endif | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -50,8 +50,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| FLOAT *ao; | FLOAT *ao; | ||||
| js = 0; | js = 0; | ||||
| #ifdef DOUBLE | |||||
| svbool_t pn = svwhilelt_b64(js, n); | svbool_t pn = svwhilelt_b64(js, n); | ||||
| int n_active = svcntp_b64(svptrue_b64(), pn); | int n_active = svcntp_b64(svptrue_b64(), pn); | ||||
| #else | |||||
| svbool_t pn = svwhilelt_b32(js, n); | |||||
| int n_active = svcntp_b32(svptrue_b32(), pn); | |||||
| #endif | |||||
| do | do | ||||
| { | { | ||||
| X = posX; | X = posX; | ||||
| @@ -72,7 +77,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| i ++; | i ++; | ||||
| } else | } else | ||||
| if (X > posY) { | if (X > posY) { | ||||
| #ifdef DOUBLE | |||||
| svfloat64_t aj_vec = svld1(pn, ao); | svfloat64_t aj_vec = svld1(pn, ao); | ||||
| #else | |||||
| svfloat32_t aj_vec = svld1(pn, ao); | |||||
| #endif | |||||
| svst1(pn, b, aj_vec); | svst1(pn, b, aj_vec); | ||||
| ao += lda; | ao += lda; | ||||
| b += n_active; | b += n_active; | ||||
| @@ -111,9 +120,15 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| posY += n_active; | posY += n_active; | ||||
| js += n_active; | js += n_active; | ||||
| #ifdef DOUBLE | |||||
| pn = svwhilelt_b64(js, n); | pn = svwhilelt_b64(js, n); | ||||
| n_active = svcntp_b64(svptrue_b64(), pn); | n_active = svcntp_b64(svptrue_b64(), pn); | ||||
| } while (svptest_any(svptrue_b64(), pn)); | } while (svptest_any(svptrue_b64(), pn)); | ||||
| #else | |||||
| pn = svwhilelt_b32(js, n); | |||||
| n_active = svcntp_b32(svptrue_b32(), pn); | |||||
| } while (svptest_any(svptrue_b32(), pn)); | |||||
| #endif | |||||
| return 0; | return 0; | ||||
| } | } | ||||