| @@ -0,0 +1,145 @@ | |||||
| /*********************************************************************/ | |||||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||||
| /* All rights reserved. */ | |||||
| /* */ | |||||
| /* Redistribution and use in source and binary forms, with or */ | |||||
| /* without modification, are permitted provided that the following */ | |||||
| /* conditions are met: */ | |||||
| /* */ | |||||
| /* 1. Redistributions of source code must retain the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer. */ | |||||
| /* */ | |||||
| /* 2. Redistributions in binary form must reproduce the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer in the documentation and/or other materials */ | |||||
| /* provided with the distribution. */ | |||||
| /* */ | |||||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||||
| /* */ | |||||
| /* The views and conclusions contained in the software and */ | |||||
| /* documentation are those of the authors and should not be */ | |||||
| /* interpreted as representing official policies, either expressed */ | |||||
| /* or implied, of The University of Texas at Austin. */ | |||||
| /*********************************************************************/ | |||||
| #include <stdio.h> | |||||
| #include "common.h" | |||||
| #ifdef __ARM_FEATURE_SVE | |||||
| #include <arm_sve.h> | |||||
| #endif | |||||
| int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, FLOAT *b){ | |||||
| BLASLONG i, js; | |||||
| BLASLONG X; | |||||
| lda += lda; | |||||
| js = 0; | |||||
| FLOAT *ao; | |||||
| #ifdef DOUBLE | |||||
| svint64_t index = svindex_s64(0LL, lda*2); | |||||
| svbool_t pn = svwhilelt_b64(js, n); | |||||
| int n_active = svcntp_b64(svptrue_b64(), pn); | |||||
| #else | |||||
| svint32_t index = svindex_s32(0, lda*2); | |||||
| svbool_t pn = svwhilelt_b32(js, n); | |||||
| int n_active = svcntp_b32(svptrue_b32(), pn); | |||||
| #endif | |||||
| do | |||||
| { | |||||
| X = posX; | |||||
| if (posX <= posY) { | |||||
| ao = a + posY * 2 + posX * lda; | |||||
| } else { | |||||
| ao = a + posX * 2 + posY * lda; | |||||
| } | |||||
| i = 0; | |||||
| do | |||||
| { | |||||
| if (X > posY) { | |||||
| #ifdef DOUBLE | |||||
| svfloat64_t aj_vec_real = svld1_gather_index(pn, ao, index); | |||||
| svfloat64_t aj_vec_imag = svld1_gather_index(pn, ao+1, index); | |||||
| #else | |||||
| svfloat32_t aj_vec_real = svld1_gather_index(pn, ao, index); | |||||
| svfloat32_t aj_vec_imag = svld1_gather_index(pn, ao+1, index); | |||||
| #endif | |||||
| svst2(pn, b, svcreate2(aj_vec_real, aj_vec_imag)); | |||||
| ao += 2; | |||||
| b += n_active * 2; | |||||
| X ++; | |||||
| i ++; | |||||
| } else | |||||
| if (X < posY) { | |||||
| ao += lda * 2; | |||||
| b += n_active * 2; | |||||
| X ++; | |||||
| i ++; | |||||
| } else { | |||||
| /* I did not find a way to unroll this while preserving vector-length-agnostic code. */ | |||||
| #ifdef UNIT | |||||
| int temp = 0; | |||||
| for (int j = 0; j < n_active; j++) { | |||||
| for (int k = 0 ; k < j; k++) { | |||||
| b[temp++] = *(ao+k*lda+j); | |||||
| b[temp++] = *(ao+k*lda+j+1); | |||||
| } | |||||
| b[temp++] = ONE; | |||||
| b[temp++] = ZERO; | |||||
| for (int k = j+1; k < n_active; k++) { | |||||
| b[temp++] = ZERO; | |||||
| b[temp++] = ZERO; | |||||
| } | |||||
| } | |||||
| #else | |||||
| int temp = 0; | |||||
| for (int j = 0; j < n_active; j++) { | |||||
| for (int k = 0 ; k <= j; k++) { | |||||
| b[temp++] = *(ao+k*lda+j); | |||||
| b[temp++] = *(ao+k*lda+j+1); | |||||
| } | |||||
| for (int k = j+1; k < n_active; k++) { | |||||
| b[temp++] = ZERO; | |||||
| b[temp++] = ZERO; | |||||
| } | |||||
| } | |||||
| #endif | |||||
| ao += n_active * 2; | |||||
| b += n_active*n_active * 2; | |||||
| X += n_active; | |||||
| i += n_active; | |||||
| } | |||||
| } while (i < m); | |||||
| posY += n_active; | |||||
| js += n_active; | |||||
| #ifdef DOUBLE | |||||
| pn = svwhilelt_b64(js, n); | |||||
| n_active = svcntp_b64(svptrue_b64(), pn); | |||||
| } while (svptest_any(svptrue_b64(), pn)); | |||||
| #else | |||||
| pn = svwhilelt_b32(js, n); | |||||
| n_active = svcntp_b32(svptrue_b32(), pn); | |||||
| } while (svptest_any(svptrue_b32(), pn)); | |||||
| #endif | |||||
| return 0; | |||||
| } | |||||
| @@ -0,0 +1,143 @@ | |||||
| /*********************************************************************/ | |||||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||||
| /* All rights reserved. */ | |||||
| /* */ | |||||
| /* Redistribution and use in source and binary forms, with or */ | |||||
| /* without modification, are permitted provided that the following */ | |||||
| /* conditions are met: */ | |||||
| /* */ | |||||
| /* 1. Redistributions of source code must retain the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer. */ | |||||
| /* */ | |||||
| /* 2. Redistributions in binary form must reproduce the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer in the documentation and/or other materials */ | |||||
| /* provided with the distribution. */ | |||||
| /* */ | |||||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||||
| /* */ | |||||
| /* The views and conclusions contained in the software and */ | |||||
| /* documentation are those of the authors and should not be */ | |||||
| /* interpreted as representing official policies, either expressed */ | |||||
| /* or implied, of The University of Texas at Austin. */ | |||||
| /*********************************************************************/ | |||||
| #include <stdio.h> | |||||
| #include "common.h" | |||||
| #ifdef __ARM_FEATURE_SVE | |||||
| #include <arm_sve.h> | |||||
| #endif | |||||
| int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, FLOAT *b){ | |||||
| BLASLONG i, js; | |||||
| BLASLONG X; | |||||
| lda += lda; | |||||
| FLOAT *ao; | |||||
| js = 0; | |||||
| #ifdef DOUBLE | |||||
| svbool_t pn = svwhilelt_b64(js, n); | |||||
| int n_active = svcntp_b64(svptrue_b64(), pn); | |||||
| #else | |||||
| svbool_t pn = svwhilelt_b32(js, n); | |||||
| int n_active = svcntp_b32(svptrue_b32(), pn); | |||||
| #endif | |||||
| do | |||||
| { | |||||
| X = posX; | |||||
| if (posX <= posY) { | |||||
| ao = a + posY * 2 + posX * lda; | |||||
| } else { | |||||
| ao = a + posX * 2 + posY * lda; | |||||
| } | |||||
| i = 0; | |||||
| do | |||||
| { | |||||
| if (X > posY) { | |||||
| ao += 2; | |||||
| b += n_active * 2; | |||||
| X ++; | |||||
| i ++; | |||||
| } else | |||||
| if (X < posY) { | |||||
| #ifdef DOUBLE | |||||
| svfloat64x2_t aj_vec = svld2(pn, ao); | |||||
| #else | |||||
| svfloat32x2_t aj_vec = svld2(pn, ao); | |||||
| #endif | |||||
| svst2(pn, b, aj_vec); | |||||
| ao += lda * 2; | |||||
| b += n_active * 2; | |||||
| X ++; | |||||
| i ++; | |||||
| } else { | |||||
| /* I did not find a way to unroll this while preserving vector-length-agnostic code. */ | |||||
| #ifdef UNIT | |||||
| int temp = 0; | |||||
| for (int j = 0; j < n_active; j++) { | |||||
| for (int k = 0 ; k < j; k++) { | |||||
| b[temp++] = ZERO; | |||||
| b[temp++] = ZERO; | |||||
| } | |||||
| b[temp++] = ONE; | |||||
| b[temp++] = ZERO; | |||||
| for (int k = j+1; k < n_active; k++) { | |||||
| b[temp++] = *(ao+j*lda+k); | |||||
| b[temp++] = *(ao+j*lda+k+1); | |||||
| } | |||||
| } | |||||
| #else | |||||
| int temp = 0; | |||||
| for (int j = 0; j < n_active; j++) { | |||||
| for (int k = 0 ; k < j; k++) { | |||||
| b[temp++] = ZERO; | |||||
| b[temp++] = ZERO; | |||||
| } | |||||
| for (int k = j; k < n_active; k++) { | |||||
| b[temp++] = *(ao+j*lda+k); | |||||
| b[temp++] = *(ao+j*lda+k+1); | |||||
| } | |||||
| } | |||||
| #endif | |||||
| ao += n_active * lda * 2; | |||||
| b += n_active*n_active * 2; | |||||
| X += n_active; | |||||
| i += n_active; | |||||
| } | |||||
| } while (i < m); | |||||
| posY += n_active; | |||||
| js += n_active; | |||||
| #ifdef DOUBLE | |||||
| pn = svwhilelt_b64(js, n); | |||||
| n_active = svcntp_b64(svptrue_b64(), pn); | |||||
| } while (svptest_any(svptrue_b64(), pn)); | |||||
| #else | |||||
| pn = svwhilelt_b32(js, n); | |||||
| n_active = svcntp_b32(svptrue_b32(), pn); | |||||
| } while (svptest_any(svptrue_b32(), pn)); | |||||
| #endif | |||||
| return 0; | |||||
| } | |||||
| @@ -0,0 +1,145 @@ | |||||
| /*********************************************************************/ | |||||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||||
| /* All rights reserved. */ | |||||
| /* */ | |||||
| /* Redistribution and use in source and binary forms, with or */ | |||||
| /* without modification, are permitted provided that the following */ | |||||
| /* conditions are met: */ | |||||
| /* */ | |||||
| /* 1. Redistributions of source code must retain the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer. */ | |||||
| /* */ | |||||
| /* 2. Redistributions in binary form must reproduce the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer in the documentation and/or other materials */ | |||||
| /* provided with the distribution. */ | |||||
| /* */ | |||||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||||
| /* */ | |||||
| /* The views and conclusions contained in the software and */ | |||||
| /* documentation are those of the authors and should not be */ | |||||
| /* interpreted as representing official policies, either expressed */ | |||||
| /* or implied, of The University of Texas at Austin. */ | |||||
| /*********************************************************************/ | |||||
| #include <stdio.h> | |||||
| #include "common.h" | |||||
| #ifdef __ARM_FEATURE_SVE | |||||
| #include <arm_sve.h> | |||||
| #endif | |||||
| int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, FLOAT *b){ | |||||
| BLASLONG i, js; | |||||
| BLASLONG X; | |||||
| lda += lda; | |||||
| js = 0; | |||||
| FLOAT *ao; | |||||
| #ifdef DOUBLE | |||||
| svint64_t index = svindex_s64(0LL, lda * 2); | |||||
| svbool_t pn = svwhilelt_b64(js, n); | |||||
| int n_active = svcntp_b64(svptrue_b64(), pn); | |||||
| #else | |||||
| svint32_t index = svindex_s32(0, lda * 2); | |||||
| svbool_t pn = svwhilelt_b32(js, n); | |||||
| int n_active = svcntp_b32(svptrue_b32(), pn); | |||||
| #endif | |||||
| do | |||||
| { | |||||
| X = posX; | |||||
| if (posX <= posY) { | |||||
| ao = a + posX * 2 + posY * lda; | |||||
| } else { | |||||
| ao = a + posY * 2 + posX * lda; | |||||
| } | |||||
| i = 0; | |||||
| do | |||||
| { | |||||
| if (X < posY) { | |||||
| #ifdef DOUBLE | |||||
| svfloat64_t aj_vec_real = svld1_gather_index(pn, ao, index); | |||||
| svfloat64_t aj_vec_imag = svld1_gather_index(pn, ao+1, index); | |||||
| #else | |||||
| svfloat32_t aj_vec_real = svld1_gather_index(pn, ao, index); | |||||
| svfloat32_t aj_vec_imag = svld1_gather_index(pn, ao+1, index); | |||||
| #endif | |||||
| svst2(pn, b, svcreate2(aj_vec_real, aj_vec_imag)); | |||||
| ao += 2; | |||||
| b += n_active * 2; | |||||
| X ++; | |||||
| i ++; | |||||
| } else | |||||
| if (X > posY) { | |||||
| ao += lda * 2; | |||||
| b += n_active * 2; | |||||
| X ++; | |||||
| i ++; | |||||
| } else { | |||||
| /* I did not find a way to unroll this while preserving vector-length-agnostic code. */ | |||||
| #ifdef UNIT | |||||
| int temp = 0; | |||||
| for (int j = 0; j < n_active; j++) { | |||||
| for (int k = 0 ; k < j; k++) { | |||||
| b[temp++] = ZERO; | |||||
| b[temp++] = ZERO; | |||||
| } | |||||
| b[temp++] = ONE; | |||||
| b[temp++] = ZERO; | |||||
| for (int k = j+1; k < n_active; k++) { | |||||
| b[temp++] = *(ao+k*lda+j); | |||||
| b[temp++] = *(ao+k*lda+j+1); | |||||
| } | |||||
| } | |||||
| #else | |||||
| int temp = 0; | |||||
| for (int j = 0; j < n_active; j++) { | |||||
| for (int k = 0 ; k < j; k++) { | |||||
| b[temp++] = ZERO; | |||||
| b[temp++] = ZERO; | |||||
| } | |||||
| for (int k = j; k < n_active; k++) { | |||||
| b[temp++] = *(ao+k*lda+j); | |||||
| b[temp++] = *(ao+k*lda+j+1); | |||||
| } | |||||
| } | |||||
| #endif | |||||
| ao += n_active * 2; | |||||
| b += n_active*n_active * 2; | |||||
| X += n_active; | |||||
| i += n_active; | |||||
| } | |||||
| } while (i < m); | |||||
| posY += n_active; | |||||
| js += n_active; | |||||
| #ifdef DOUBLE | |||||
| pn = svwhilelt_b64(js, n); | |||||
| n_active = svcntp_b64(svptrue_b64(), pn); | |||||
| } while (svptest_any(svptrue_b64(), pn)); | |||||
| #else | |||||
| pn = svwhilelt_b32(js, n); | |||||
| n_active = svcntp_b32(svptrue_b32(), pn); | |||||
| } while (svptest_any(svptrue_b32(), pn)); | |||||
| #endif | |||||
| return 0; | |||||
| } | |||||
| @@ -0,0 +1,141 @@ | |||||
| /*********************************************************************/ | |||||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||||
| /* All rights reserved. */ | |||||
| /* */ | |||||
| /* Redistribution and use in source and binary forms, with or */ | |||||
| /* without modification, are permitted provided that the following */ | |||||
| /* conditions are met: */ | |||||
| /* */ | |||||
| /* 1. Redistributions of source code must retain the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer. */ | |||||
| /* */ | |||||
| /* 2. Redistributions in binary form must reproduce the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer in the documentation and/or other materials */ | |||||
| /* provided with the distribution. */ | |||||
| /* */ | |||||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||||
| /* */ | |||||
| /* The views and conclusions contained in the software and */ | |||||
| /* documentation are those of the authors and should not be */ | |||||
| /* interpreted as representing official policies, either expressed */ | |||||
| /* or implied, of The University of Texas at Austin. */ | |||||
| /*********************************************************************/ | |||||
| #include <stdio.h> | |||||
| #include "common.h" | |||||
| #ifdef __ARM_FEATURE_SVE | |||||
| #include <arm_sve.h> | |||||
| #endif | |||||
| int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, FLOAT *b){ | |||||
| BLASLONG i, js; | |||||
| BLASLONG X; | |||||
| lda += lda; | |||||
| FLOAT *ao; | |||||
| js = 0; | |||||
| #ifdef DOUBLE | |||||
| svbool_t pn = svwhilelt_b64(js, n); | |||||
| int n_active = svcntp_b64(svptrue_b64(), pn); | |||||
| #else | |||||
| svbool_t pn = svwhilelt_b32(js, n); | |||||
| int n_active = svcntp_b32(svptrue_b32(), pn); | |||||
| #endif | |||||
| do | |||||
| { | |||||
| X = posX; | |||||
| if (posX <= posY) { | |||||
| ao = a + posX * 2 + posY * lda; | |||||
| } else { | |||||
| ao = a + posY * 2 + posX * lda; | |||||
| } | |||||
| i = 0; | |||||
| do | |||||
| { | |||||
| if (X < posY) { | |||||
| ao += 2; | |||||
| b += n_active * 2; | |||||
| X ++; | |||||
| i ++; | |||||
| } else | |||||
| if (X > posY) { | |||||
| #ifdef DOUBLE | |||||
| svfloat64x2_t aj_vec = svld2(pn, ao); | |||||
| #else | |||||
| svfloat32x2_t aj_vec = svld2(pn, ao); | |||||
| #endif | |||||
| svst2(pn, b, aj_vec); | |||||
| ao += lda * 2; | |||||
| b += n_active * 2; | |||||
| X ++; | |||||
| i ++; | |||||
| } else { | |||||
| /* I did not find a way to unroll this while preserving vector-length-agnostic code. */ | |||||
| #ifdef UNIT | |||||
| int temp = 0; | |||||
| for (int j = 0; j < n_active; j++) { | |||||
| for (int k = 0 ; k < j; k++) { | |||||
| b[temp++] = *(ao+j*lda+k); | |||||
| b[temp++] = *(ao+j*lda+k+1); | |||||
| } | |||||
| b[temp++] = ONE; | |||||
| b[temp++] = ZERO; | |||||
| for (int k = j+1; k < n_active; k++) { | |||||
| b[temp++] = ZERO; | |||||
| b[temp++] = ZERO; | |||||
| } | |||||
| } | |||||
| #else | |||||
| int temp = 0; | |||||
| for (int j = 0; j < n_active; j++) { | |||||
| for (int k = 0 ; k <= j; k++) { | |||||
| b[temp++] = *(ao+j*lda+k); | |||||
| b[temp++] = *(ao+j*lda+k+1); | |||||
| } | |||||
| for (int k = j+1; k < n_active; k++) { | |||||
| b[temp++] = ZERO; | |||||
| b[temp++] = ZERO; | |||||
| } | |||||
| } | |||||
| #endif | |||||
| ao += n_active * lda * 2; | |||||
| b += n_active*n_active * 2; | |||||
| X += n_active; | |||||
| i += n_active; | |||||
| } | |||||
| } while (i < m); | |||||
| posY += n_active; | |||||
| js += n_active; | |||||
| #ifdef DOUBLE | |||||
| pn = svwhilelt_b64(js, n); | |||||
| n_active = svcntp_b64(svptrue_b64(), pn); | |||||
| } while (svptest_any(svptrue_b64(), pn)); | |||||
| #else | |||||
| pn = svwhilelt_b32(js, n); | |||||
| n_active = svcntp_b32(svptrue_b32(), pn); | |||||
| } while (svptest_any(svptrue_b32(), pn)); | |||||
| #endif | |||||
| return 0; | |||||
| } | |||||