kernel/riscv64:Added support for omatcopy on RISCV64_ZVL256Btags/v0.3.30
| @@ -83,9 +83,39 @@ jobs: | |||
| - name: test | |||
| run: | | |||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH | |||
| qemu-riscv64 ./utest/openblas_utest | |||
| qemu-riscv64 ./utest/openblas_utest_ext | |||
| run_with_retry() { | |||
| local cmd="$1" | |||
| local time_out=10 | |||
| local retries=10 | |||
| local attempt=0 | |||
| for ((i=1; i<=retries; i++)); do | |||
| attempt=$((i)) | |||
| if timeout -s 12 --preserve-status $time_out $cmd; then | |||
| echo "Command succeeded on attempt $i." | |||
| return 0 | |||
| else | |||
| local exit_code=$? | |||
| if [ $exit_code -eq 140 ]; then | |||
| echo "Attempt $i timed out (retrying...)" | |||
| time_out=$((time_out + 5)) | |||
| else | |||
| echo "Attempt $i failed with exit code $exit_code. Aborting workflow." | |||
| exit $exit_code | |||
| fi | |||
| fi | |||
| done | |||
| echo "All $retries attempts failed, giving up." | |||
| echo "Final failure was due to timeout." | |||
| echo "Aborting workflow." | |||
| exit $exit_code | |||
| } | |||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH | |||
| which qemu-riscv64 | |||
| export QEMU_BIN=$(which qemu-riscv64) | |||
| run_with_retry "$QEMU_BIN ./utest/openblas_utest" | |||
| run_with_retry "$QEMU_BIN ./utest/openblas_utest_ext" | |||
| OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat1 | |||
| @@ -201,3 +201,9 @@ endif | |||
| ifndef ZGEMM_BETA | |||
| ZGEMM_BETA = ../generic/zgemm_beta.c | |||
| endif | |||
| ZOMATCOPY_CN = zomatcopy_cn_vector.c | |||
| COMATCOPY_CN = zomatcopy_cn_vector.c | |||
| DOMATCOPY_CN = omatcopy_cn_vector.c | |||
| SOMATCOPY_CN = omatcopy_cn_vector.c | |||
| @@ -0,0 +1,123 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m4)() | |||
| #define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) | |||
| #define FLOAT_V_T vfloat32m4_t | |||
| #define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4) | |||
| #define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4) | |||
| #define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m4) | |||
| #define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) | |||
| #else | |||
| #define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m4)() | |||
| #define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) | |||
| #define FLOAT_V_T vfloat64m4_t | |||
| #define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4) | |||
| #define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4) | |||
| #define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f64m4) | |||
| #define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) | |||
| #endif | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) | |||
| { | |||
| BLASLONG i,j; | |||
| FLOAT *aptr,*bptr; | |||
| size_t vl; | |||
| FLOAT_V_T va, vb,va1,vb1; | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| aptr = a; | |||
| bptr = b; | |||
| if ( alpha == 0.0 ) | |||
| { | |||
| vl = VSETVL_MAX; | |||
| va = VFMVVF_FLOAT(0, vl); | |||
| for ( i=0; i<cols ; i++ ) | |||
| { | |||
| for(j=0; j<rows; j+=vl) | |||
| { | |||
| vl = VSETVL(rows - j); | |||
| VSEV_FLOAT(bptr + j, va, vl); | |||
| } | |||
| bptr += ldb; | |||
| } | |||
| return(0); | |||
| } | |||
| if ( alpha == 1.0 ) | |||
| { | |||
| for ( i=0; i<cols ; i++ ) | |||
| { | |||
| for(j=0; j<rows; j+=vl) | |||
| { | |||
| vl = VSETVL(rows - j); | |||
| va = VLEV_FLOAT(aptr + j, vl); | |||
| VSEV_FLOAT(bptr + j, va, vl); | |||
| } | |||
| aptr += lda; | |||
| bptr += ldb; | |||
| } | |||
| return(0); | |||
| } | |||
| i = 0; | |||
| if( cols % 2 ){ | |||
| for(j=0; j<rows; j+=vl) | |||
| { | |||
| vl = VSETVL(rows - j); | |||
| va = VLEV_FLOAT(aptr + j, vl); | |||
| va = VFMULVF_FLOAT(va, alpha, vl); | |||
| VSEV_FLOAT(bptr + j, va, vl); | |||
| } | |||
| aptr += lda; | |||
| bptr += ldb; | |||
| i = 1; | |||
| } | |||
| for ( ; i<cols ; i+=2 ) | |||
| { | |||
| for(j=0; j<rows; j+=vl) | |||
| { | |||
| vl = VSETVL(rows - j); | |||
| va = VLEV_FLOAT(aptr + j, vl); | |||
| va1= VLEV_FLOAT(aptr + lda + j, vl); | |||
| va = VFMULVF_FLOAT(va, alpha, vl); | |||
| va1= VFMULVF_FLOAT(va1, alpha, vl); | |||
| VSEV_FLOAT(bptr + j, va, vl); | |||
| VSEV_FLOAT(bptr + ldb + j, va1, vl); | |||
| } | |||
| aptr += 2 * lda; | |||
| bptr += 2 * ldb; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,106 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) | |||
| #define FLOAT_V_T vfloat32m4_t | |||
| #define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4) | |||
| #define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) | |||
| #define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4) | |||
| #define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) | |||
| #define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) | |||
| #define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4) | |||
| #define VFMUL_VF_FLOAT RISCV_RVV(vfmul_vf_f32m4) | |||
| #define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4) | |||
| #define VLSEG2_FLOAT RISCV_RVV(vlseg2e32_v_f32m4x2) | |||
| #define VSSEG2_FLOAT RISCV_RVV(vsseg2e32_v_f32m4x2) | |||
| #define FLOAT_VX2_T vfloat32m4x2_t | |||
| #define VGET_VX2 RISCV_RVV(vget_v_f32m4x2_f32m4) | |||
| #define VSET_VX2 RISCV_RVV(vset_v_f32m4_f32m4x2) | |||
| #else | |||
| #define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) | |||
| #define FLOAT_V_T vfloat64m4_t | |||
| #define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4) | |||
| #define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) | |||
| #define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4) | |||
| #define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) | |||
| #define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) | |||
| #define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4) | |||
| #define VFMUL_VF_FLOAT RISCV_RVV(vfmul_vf_f64m4) | |||
| #define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4) | |||
| #define VLSEG2_FLOAT RISCV_RVV(vlseg2e64_v_f64m4x2) | |||
| #define VSSEG2_FLOAT RISCV_RVV(vsseg2e64_v_f64m4x2) | |||
| #define FLOAT_VX2_T vfloat64m4x2_t | |||
| #define VGET_VX2 RISCV_RVV(vget_v_f64m4x2_f64m4) | |||
| #define VSET_VX2 RISCV_RVV(vset_v_f64m4_f64m4x2) | |||
| #endif | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) | |||
| { | |||
| BLASLONG i,j,ia; | |||
| FLOAT *aptr,*bptr; | |||
| FLOAT_V_T bptr_v0 , bptr_v1 , aptr_v0 ,aptr_v1; | |||
| FLOAT_VX2_T va, vb; | |||
| unsigned int gvl = 0; | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| aptr = a; | |||
| bptr = b; | |||
| lda *= 2; | |||
| ldb *= 2; | |||
| for ( i=0; i<cols ; i++ ) | |||
| { | |||
| ia = 0; | |||
| for(j=0; j<rows ; j+=gvl) | |||
| { | |||
| gvl = VSETVL(rows - j); | |||
| va = VLSEG2_FLOAT(aptr + ia, gvl); | |||
| aptr_v0 = VGET_VX2(va, 0); | |||
| aptr_v1 = VGET_VX2(va, 1); | |||
| bptr_v1 = VFMUL_VF_FLOAT( aptr_v1, alpha_r,gvl); | |||
| bptr_v1 = VFMACCVF_FLOAT(bptr_v1, alpha_i, aptr_v0, gvl); | |||
| bptr_v0 = VFMUL_VF_FLOAT( aptr_v0,alpha_r, gvl); | |||
| bptr_v0 = VFNMSACVF_FLOAT(bptr_v0, alpha_i, aptr_v1, gvl); | |||
| vb = VSET_VX2(vb, 0, bptr_v0); | |||
| vb = VSET_VX2(vb, 1, bptr_v1); | |||
| VSSEG2_FLOAT(&bptr[ia], vb, gvl); | |||
| ia += gvl * 2 ; | |||
| } | |||
| aptr += lda; | |||
| bptr += ldb; | |||
| } | |||
| return(0); | |||
| } | |||