kernel/riscv64:Added support for omatcopy on RISCV64_ZVL256Btags/v0.3.30
| @@ -83,9 +83,39 @@ jobs: | |||||
| - name: test | - name: test | ||||
| run: | | run: | | ||||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH | |||||
| qemu-riscv64 ./utest/openblas_utest | |||||
| qemu-riscv64 ./utest/openblas_utest_ext | |||||
| run_with_retry() { | |||||
| local cmd="$1" | |||||
| local time_out=10 | |||||
| local retries=10 | |||||
| local attempt=0 | |||||
| for ((i=1; i<=retries; i++)); do | |||||
| attempt=$((i)) | |||||
| if timeout -s 12 --preserve-status $time_out $cmd; then | |||||
| echo "Command succeeded on attempt $i." | |||||
| return 0 | |||||
| else | |||||
| local exit_code=$? | |||||
| if [ $exit_code -eq 140 ]; then | |||||
| echo "Attempt $i timed out (retrying...)" | |||||
| time_out=$((time_out + 5)) | |||||
| else | |||||
| echo "Attempt $i failed with exit code $exit_code. Aborting workflow." | |||||
| exit $exit_code | |||||
| fi | |||||
| fi | |||||
| done | |||||
| echo "All $retries attempts failed, giving up." | |||||
| echo "Final failure was due to timeout." | |||||
| echo "Aborting workflow." | |||||
| exit $exit_code | |||||
| } | |||||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH | |||||
| which qemu-riscv64 | |||||
| export QEMU_BIN=$(which qemu-riscv64) | |||||
| run_with_retry "$QEMU_BIN ./utest/openblas_utest" | |||||
| run_with_retry "$QEMU_BIN ./utest/openblas_utest_ext" | |||||
| OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat1 | OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat1 | ||||
| OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat1 | OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat1 | ||||
| OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat1 | OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat1 | ||||
| @@ -201,3 +201,9 @@ endif | |||||
| ifndef ZGEMM_BETA | ifndef ZGEMM_BETA | ||||
| ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
| endif | endif | ||||
| ZOMATCOPY_CN = zomatcopy_cn_vector.c | |||||
| COMATCOPY_CN = zomatcopy_cn_vector.c | |||||
| DOMATCOPY_CN = omatcopy_cn_vector.c | |||||
| SOMATCOPY_CN = omatcopy_cn_vector.c | |||||
| @@ -0,0 +1,123 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2013, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #include "common.h" | |||||
| #if !defined(DOUBLE) | |||||
| #define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m4)() | |||||
| #define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) | |||||
| #define FLOAT_V_T vfloat32m4_t | |||||
| #define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4) | |||||
| #define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4) | |||||
| #define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m4) | |||||
| #define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) | |||||
| #else | |||||
| #define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m4)() | |||||
| #define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) | |||||
| #define FLOAT_V_T vfloat64m4_t | |||||
| #define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4) | |||||
| #define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4) | |||||
| #define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f64m4) | |||||
| #define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) | |||||
| #endif | |||||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) | |||||
| { | |||||
| BLASLONG i,j; | |||||
| FLOAT *aptr,*bptr; | |||||
| size_t vl; | |||||
| FLOAT_V_T va, vb,va1,vb1; | |||||
| if ( rows <= 0 ) return(0); | |||||
| if ( cols <= 0 ) return(0); | |||||
| aptr = a; | |||||
| bptr = b; | |||||
| if ( alpha == 0.0 ) | |||||
| { | |||||
| vl = VSETVL_MAX; | |||||
| va = VFMVVF_FLOAT(0, vl); | |||||
| for ( i=0; i<cols ; i++ ) | |||||
| { | |||||
| for(j=0; j<rows; j+=vl) | |||||
| { | |||||
| vl = VSETVL(rows - j); | |||||
| VSEV_FLOAT(bptr + j, va, vl); | |||||
| } | |||||
| bptr += ldb; | |||||
| } | |||||
| return(0); | |||||
| } | |||||
| if ( alpha == 1.0 ) | |||||
| { | |||||
| for ( i=0; i<cols ; i++ ) | |||||
| { | |||||
| for(j=0; j<rows; j+=vl) | |||||
| { | |||||
| vl = VSETVL(rows - j); | |||||
| va = VLEV_FLOAT(aptr + j, vl); | |||||
| VSEV_FLOAT(bptr + j, va, vl); | |||||
| } | |||||
| aptr += lda; | |||||
| bptr += ldb; | |||||
| } | |||||
| return(0); | |||||
| } | |||||
| i = 0; | |||||
| if( cols % 2 ){ | |||||
| for(j=0; j<rows; j+=vl) | |||||
| { | |||||
| vl = VSETVL(rows - j); | |||||
| va = VLEV_FLOAT(aptr + j, vl); | |||||
| va = VFMULVF_FLOAT(va, alpha, vl); | |||||
| VSEV_FLOAT(bptr + j, va, vl); | |||||
| } | |||||
| aptr += lda; | |||||
| bptr += ldb; | |||||
| i = 1; | |||||
| } | |||||
| for ( ; i<cols ; i+=2 ) | |||||
| { | |||||
| for(j=0; j<rows; j+=vl) | |||||
| { | |||||
| vl = VSETVL(rows - j); | |||||
| va = VLEV_FLOAT(aptr + j, vl); | |||||
| va1= VLEV_FLOAT(aptr + lda + j, vl); | |||||
| va = VFMULVF_FLOAT(va, alpha, vl); | |||||
| va1= VFMULVF_FLOAT(va1, alpha, vl); | |||||
| VSEV_FLOAT(bptr + j, va, vl); | |||||
| VSEV_FLOAT(bptr + ldb + j, va1, vl); | |||||
| } | |||||
| aptr += 2 * lda; | |||||
| bptr += 2 * ldb; | |||||
| } | |||||
| return(0); | |||||
| } | |||||
| @@ -0,0 +1,106 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2013, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #include "common.h" | |||||
| #if !defined(DOUBLE) | |||||
| #define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) | |||||
| #define FLOAT_V_T vfloat32m4_t | |||||
| #define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4) | |||||
| #define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) | |||||
| #define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4) | |||||
| #define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) | |||||
| #define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) | |||||
| #define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4) | |||||
| #define VFMUL_VF_FLOAT RISCV_RVV(vfmul_vf_f32m4) | |||||
| #define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4) | |||||
| #define VLSEG2_FLOAT RISCV_RVV(vlseg2e32_v_f32m4x2) | |||||
| #define VSSEG2_FLOAT RISCV_RVV(vsseg2e32_v_f32m4x2) | |||||
| #define FLOAT_VX2_T vfloat32m4x2_t | |||||
| #define VGET_VX2 RISCV_RVV(vget_v_f32m4x2_f32m4) | |||||
| #define VSET_VX2 RISCV_RVV(vset_v_f32m4_f32m4x2) | |||||
| #else | |||||
| #define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) | |||||
| #define FLOAT_V_T vfloat64m4_t | |||||
| #define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4) | |||||
| #define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) | |||||
| #define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4) | |||||
| #define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) | |||||
| #define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) | |||||
| #define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4) | |||||
| #define VFMUL_VF_FLOAT RISCV_RVV(vfmul_vf_f64m4) | |||||
| #define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4) | |||||
| #define VLSEG2_FLOAT RISCV_RVV(vlseg2e64_v_f64m4x2) | |||||
| #define VSSEG2_FLOAT RISCV_RVV(vsseg2e64_v_f64m4x2) | |||||
| #define FLOAT_VX2_T vfloat64m4x2_t | |||||
| #define VGET_VX2 RISCV_RVV(vget_v_f64m4x2_f64m4) | |||||
| #define VSET_VX2 RISCV_RVV(vset_v_f64m4_f64m4x2) | |||||
| #endif | |||||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) | |||||
| { | |||||
| BLASLONG i,j,ia; | |||||
| FLOAT *aptr,*bptr; | |||||
| FLOAT_V_T bptr_v0 , bptr_v1 , aptr_v0 ,aptr_v1; | |||||
| FLOAT_VX2_T va, vb; | |||||
| unsigned int gvl = 0; | |||||
| if ( rows <= 0 ) return(0); | |||||
| if ( cols <= 0 ) return(0); | |||||
| aptr = a; | |||||
| bptr = b; | |||||
| lda *= 2; | |||||
| ldb *= 2; | |||||
| for ( i=0; i<cols ; i++ ) | |||||
| { | |||||
| ia = 0; | |||||
| for(j=0; j<rows ; j+=gvl) | |||||
| { | |||||
| gvl = VSETVL(rows - j); | |||||
| va = VLSEG2_FLOAT(aptr + ia, gvl); | |||||
| aptr_v0 = VGET_VX2(va, 0); | |||||
| aptr_v1 = VGET_VX2(va, 1); | |||||
| bptr_v1 = VFMUL_VF_FLOAT( aptr_v1, alpha_r,gvl); | |||||
| bptr_v1 = VFMACCVF_FLOAT(bptr_v1, alpha_i, aptr_v0, gvl); | |||||
| bptr_v0 = VFMUL_VF_FLOAT( aptr_v0,alpha_r, gvl); | |||||
| bptr_v0 = VFNMSACVF_FLOAT(bptr_v0, alpha_i, aptr_v1, gvl); | |||||
| vb = VSET_VX2(vb, 0, bptr_v0); | |||||
| vb = VSET_VX2(vb, 1, bptr_v1); | |||||
| VSSEG2_FLOAT(&bptr[ia], vb, gvl); | |||||
| ia += gvl * 2 ; | |||||
| } | |||||
| aptr += lda; | |||||
| bptr += ldb; | |||||
| } | |||||
| return(0); | |||||
| } | |||||