Replace the __riscv_vid_v_i32m2 and __riscv_vid_v_i64m2 with __riscv…_vid_v_u32m2 and __riscv_vid_v_u64m2 for riscv64-unknown-linux-gnu-gcc compiling.tags/v0.3.29
| @@ -35,11 +35,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VSEV_FLOAT __riscv_vse32_v_f32m2 | #define VSEV_FLOAT __riscv_vse32_v_f32m2 | ||||
| #define VLSEV_FLOAT __riscv_vlse32_v_f32m2 | #define VLSEV_FLOAT __riscv_vlse32_v_f32m2 | ||||
| #define INT_V_T vint32m2_t | #define INT_V_T vint32m2_t | ||||
| #define VID_V_INT __riscv_vid_v_i32m2 | |||||
| #define VID_V_INT __riscv_vid_v_u32m2 | |||||
| #define VADD_VX_INT __riscv_vadd_vx_i32m2 | #define VADD_VX_INT __riscv_vadd_vx_i32m2 | ||||
| #define VMSGT_VX_INT __riscv_vmsgt_vx_i32m2_b16 | #define VMSGT_VX_INT __riscv_vmsgt_vx_i32m2_b16 | ||||
| #define VBOOL_T vbool16_t | #define VBOOL_T vbool16_t | ||||
| #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f32m2 | #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f32m2 | ||||
| #define V_UM2_TO_IM2 __riscv_vreinterpret_v_u32m2_i32m2 | |||||
| #else | #else | ||||
| #define VSETVL(n) __riscv_vsetvl_e64m2(n) | #define VSETVL(n) __riscv_vsetvl_e64m2(n) | ||||
| #define VSETVL_MAX __riscv_vsetvlmax_e64m2() | #define VSETVL_MAX __riscv_vsetvlmax_e64m2() | ||||
| @@ -48,11 +49,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VSEV_FLOAT __riscv_vse64_v_f64m2 | #define VSEV_FLOAT __riscv_vse64_v_f64m2 | ||||
| #define VLSEV_FLOAT __riscv_vlse64_v_f64m2 | #define VLSEV_FLOAT __riscv_vlse64_v_f64m2 | ||||
| #define INT_V_T vint64m2_t | #define INT_V_T vint64m2_t | ||||
| #define VID_V_INT __riscv_vid_v_i64m2 | |||||
| #define VID_V_INT __riscv_vid_v_u64m2 | |||||
| #define VADD_VX_INT __riscv_vadd_vx_i64m2 | #define VADD_VX_INT __riscv_vadd_vx_i64m2 | ||||
| #define VMSGT_VX_INT __riscv_vmsgt_vx_i64m2_b32 | #define VMSGT_VX_INT __riscv_vmsgt_vx_i64m2_b32 | ||||
| #define VBOOL_T vbool32_t | #define VBOOL_T vbool32_t | ||||
| #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f64m2 | #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f64m2 | ||||
| #define V_UM2_TO_IM2 __riscv_vreinterpret_v_u64m2_i64m2 | |||||
| #endif | #endif | ||||
| // Optimizes the implementation in ../generic/symm_lcopy_4.c | // Optimizes the implementation in ../generic/symm_lcopy_4.c | ||||
| @@ -70,7 +72,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| INT_V_T vindex_max, vindex; | INT_V_T vindex_max, vindex; | ||||
| size_t vl = VSETVL_MAX; | size_t vl = VSETVL_MAX; | ||||
| vindex_max = VID_V_INT(vl); | |||||
| vindex_max = V_UM2_TO_IM2(VID_V_INT(vl)); | |||||
| for (js = n; js > 0; js -= vl, posX += vl) { | for (js = n; js > 0; js -= vl, posX += vl) { | ||||
| vl = VSETVL(js); | vl = VSETVL(js); | ||||
| @@ -98,4 +100,3 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -35,11 +35,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VSEV_FLOAT __riscv_vse32_v_f32m2 | #define VSEV_FLOAT __riscv_vse32_v_f32m2 | ||||
| #define VLSEV_FLOAT __riscv_vlse32_v_f32m2 | #define VLSEV_FLOAT __riscv_vlse32_v_f32m2 | ||||
| #define INT_V_T vint32m2_t | #define INT_V_T vint32m2_t | ||||
| #define VID_V_INT __riscv_vid_v_i32m2 | |||||
| #define VID_V_INT __riscv_vid_v_u32m2 | |||||
| #define VADD_VX_INT __riscv_vadd_vx_i32m2 | #define VADD_VX_INT __riscv_vadd_vx_i32m2 | ||||
| #define VMSGT_VX_INT __riscv_vmsgt_vx_i32m2_b16 | #define VMSGT_VX_INT __riscv_vmsgt_vx_i32m2_b16 | ||||
| #define VBOOL_T vbool16_t | #define VBOOL_T vbool16_t | ||||
| #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f32m2 | #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f32m2 | ||||
| #define V_UM2_TO_IM2 __riscv_vreinterpret_v_u32m2_i32m2 | |||||
| #else | #else | ||||
| #define VSETVL(n) __riscv_vsetvl_e64m2(n) | #define VSETVL(n) __riscv_vsetvl_e64m2(n) | ||||
| #define VSETVL_MAX __riscv_vsetvlmax_e64m2() | #define VSETVL_MAX __riscv_vsetvlmax_e64m2() | ||||
| @@ -48,11 +49,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VSEV_FLOAT __riscv_vse64_v_f64m2 | #define VSEV_FLOAT __riscv_vse64_v_f64m2 | ||||
| #define VLSEV_FLOAT __riscv_vlse64_v_f64m2 | #define VLSEV_FLOAT __riscv_vlse64_v_f64m2 | ||||
| #define INT_V_T vint64m2_t | #define INT_V_T vint64m2_t | ||||
| #define VID_V_INT __riscv_vid_v_i64m2 | |||||
| #define VID_V_INT __riscv_vid_v_u64m2 | |||||
| #define VADD_VX_INT __riscv_vadd_vx_i64m2 | #define VADD_VX_INT __riscv_vadd_vx_i64m2 | ||||
| #define VMSGT_VX_INT __riscv_vmsgt_vx_i64m2_b32 | #define VMSGT_VX_INT __riscv_vmsgt_vx_i64m2_b32 | ||||
| #define VBOOL_T vbool32_t | #define VBOOL_T vbool32_t | ||||
| #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f64m2 | #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f64m2 | ||||
| #define V_UM2_TO_IM2 __riscv_vreinterpret_v_u64m2_i64m2 | |||||
| #endif | #endif | ||||
| // Optimizes the implementation in ../generic/symm_ucopy_4.c | // Optimizes the implementation in ../generic/symm_ucopy_4.c | ||||
| @@ -70,7 +72,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| INT_V_T vindex_max, vindex; | INT_V_T vindex_max, vindex; | ||||
| size_t vl = VSETVL_MAX; | size_t vl = VSETVL_MAX; | ||||
| vindex_max = VID_V_INT(vl); | |||||
| vindex_max = V_UM2_TO_IM2(VID_V_INT(vl)); | |||||
| for (js = n; js > 0; js -= vl, posX += vl) { | for (js = n; js > 0; js -= vl, posX += vl) { | ||||
| vl = VSETVL(js); | vl = VSETVL(js); | ||||
| @@ -97,4 +99,4 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | |||||
| } | |||||
| @@ -41,7 +41,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VLSSEG2_FLOAT __riscv_vlsseg2e32_v_f32m2x2 | #define VLSSEG2_FLOAT __riscv_vlsseg2e32_v_f32m2x2 | ||||
| #define VSSEG2_FLOAT __riscv_vsseg2e32_v_f32m2x2 | #define VSSEG2_FLOAT __riscv_vsseg2e32_v_f32m2x2 | ||||
| #define INT_V_T vint32m2_t | #define INT_V_T vint32m2_t | ||||
| #define VID_V_INT __riscv_vid_v_i32m2 | |||||
| #define VID_V_INT __riscv_vid_v_u32m2 | |||||
| #define VADD_VX_INT __riscv_vadd_vx_i32m2 | #define VADD_VX_INT __riscv_vadd_vx_i32m2 | ||||
| #define VFRSUB_VF_FLOAT __riscv_vfrsub_vf_f32m2 | #define VFRSUB_VF_FLOAT __riscv_vfrsub_vf_f32m2 | ||||
| #define VMSGT_VX_INT __riscv_vmsgt_vx_i32m2_b16 | #define VMSGT_VX_INT __riscv_vmsgt_vx_i32m2_b16 | ||||
| @@ -50,6 +50,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VBOOL_T vbool16_t | #define VBOOL_T vbool16_t | ||||
| #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f32m2 | #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f32m2 | ||||
| #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m2 | #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m2 | ||||
| #define V_UM2_TO_IM2 __riscv_vreinterpret_v_u32m2_i32m2 | |||||
| #else | #else | ||||
| #define VSETVL(n) __riscv_vsetvl_e64m2(n) | #define VSETVL(n) __riscv_vsetvl_e64m2(n) | ||||
| #define VSETVL_MAX __riscv_vsetvlmax_e64m2() | #define VSETVL_MAX __riscv_vsetvlmax_e64m2() | ||||
| @@ -64,7 +65,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VLSSEG2_FLOAT __riscv_vlsseg2e64_v_f64m2x2 | #define VLSSEG2_FLOAT __riscv_vlsseg2e64_v_f64m2x2 | ||||
| #define VSSEG2_FLOAT __riscv_vsseg2e64_v_f64m2x2 | #define VSSEG2_FLOAT __riscv_vsseg2e64_v_f64m2x2 | ||||
| #define INT_V_T vint64m2_t | #define INT_V_T vint64m2_t | ||||
| #define VID_V_INT __riscv_vid_v_i64m2 | |||||
| #define VID_V_INT __riscv_vid_v_u64m2 | |||||
| #define VADD_VX_INT __riscv_vadd_vx_i64m2 | #define VADD_VX_INT __riscv_vadd_vx_i64m2 | ||||
| #define VFRSUB_VF_FLOAT __riscv_vfrsub_vf_f64m2 | #define VFRSUB_VF_FLOAT __riscv_vfrsub_vf_f64m2 | ||||
| #define VMSGT_VX_INT __riscv_vmsgt_vx_i64m2_b32 | #define VMSGT_VX_INT __riscv_vmsgt_vx_i64m2_b32 | ||||
| @@ -73,6 +74,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VBOOL_T vbool32_t | #define VBOOL_T vbool32_t | ||||
| #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f64m2 | #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f64m2 | ||||
| #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m2 | #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m2 | ||||
| #define V_UM2_TO_IM2 __riscv_vreinterpret_v_u64m2_i64m2 | |||||
| #endif | #endif | ||||
| @@ -92,7 +94,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| INT_V_T vindex_max, vindex; | INT_V_T vindex_max, vindex; | ||||
| size_t vl = VSETVL_MAX; | size_t vl = VSETVL_MAX; | ||||
| vindex_max = VID_V_INT(vl); | |||||
| vindex_max = V_UM2_TO_IM2(VID_V_INT(vl)); | |||||
| vzero = VFMVVF_FLOAT(ZERO, vl); | vzero = VFMVVF_FLOAT(ZERO, vl); | ||||
| for (js = n; js > 0; js -= vl, posX += vl) { | for (js = n; js > 0; js -= vl, posX += vl) { | ||||
| @@ -136,4 +138,3 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -41,7 +41,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VLSSEG2_FLOAT __riscv_vlsseg2e32_v_f32m2x2 | #define VLSSEG2_FLOAT __riscv_vlsseg2e32_v_f32m2x2 | ||||
| #define VSSEG2_FLOAT __riscv_vsseg2e32_v_f32m2x2 | #define VSSEG2_FLOAT __riscv_vsseg2e32_v_f32m2x2 | ||||
| #define INT_V_T vint32m2_t | #define INT_V_T vint32m2_t | ||||
| #define VID_V_INT __riscv_vid_v_i32m2 | |||||
| #define VID_V_INT __riscv_vid_v_u32m2 | |||||
| #define VADD_VX_INT __riscv_vadd_vx_i32m2 | #define VADD_VX_INT __riscv_vadd_vx_i32m2 | ||||
| #define VFRSUB_VF_FLOAT __riscv_vfrsub_vf_f32m2 | #define VFRSUB_VF_FLOAT __riscv_vfrsub_vf_f32m2 | ||||
| #define VMSGT_VX_INT __riscv_vmsgt_vx_i32m2_b16 | #define VMSGT_VX_INT __riscv_vmsgt_vx_i32m2_b16 | ||||
| @@ -50,6 +50,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VBOOL_T vbool16_t | #define VBOOL_T vbool16_t | ||||
| #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f32m2 | #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f32m2 | ||||
| #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m2 | #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m2 | ||||
| #define V_UM2_TO_IM2 __riscv_vreinterpret_v_u32m2_i32m2 | |||||
| #else | #else | ||||
| #define VSETVL(n) __riscv_vsetvl_e64m2(n) | #define VSETVL(n) __riscv_vsetvl_e64m2(n) | ||||
| #define VSETVL_MAX __riscv_vsetvlmax_e64m2() | #define VSETVL_MAX __riscv_vsetvlmax_e64m2() | ||||
| @@ -64,7 +65,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VLSSEG2_FLOAT __riscv_vlsseg2e64_v_f64m2x2 | #define VLSSEG2_FLOAT __riscv_vlsseg2e64_v_f64m2x2 | ||||
| #define VSSEG2_FLOAT __riscv_vsseg2e64_v_f64m2x2 | #define VSSEG2_FLOAT __riscv_vsseg2e64_v_f64m2x2 | ||||
| #define INT_V_T vint64m2_t | #define INT_V_T vint64m2_t | ||||
| #define VID_V_INT __riscv_vid_v_i64m2 | |||||
| #define VID_V_INT __riscv_vid_v_u64m2 | |||||
| #define VADD_VX_INT __riscv_vadd_vx_i64m2 | #define VADD_VX_INT __riscv_vadd_vx_i64m2 | ||||
| #define VFRSUB_VF_FLOAT __riscv_vfrsub_vf_f64m2 | #define VFRSUB_VF_FLOAT __riscv_vfrsub_vf_f64m2 | ||||
| #define VMSGT_VX_INT __riscv_vmsgt_vx_i64m2_b32 | #define VMSGT_VX_INT __riscv_vmsgt_vx_i64m2_b32 | ||||
| @@ -73,6 +74,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VBOOL_T vbool32_t | #define VBOOL_T vbool32_t | ||||
| #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f64m2 | #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f64m2 | ||||
| #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m2 | #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m2 | ||||
| #define V_UM2_TO_IM2 __riscv_vreinterpret_v_u64m2_i64m2 | |||||
| #endif | #endif | ||||
| @@ -90,7 +92,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| INT_V_T vindex_max, vindex; | INT_V_T vindex_max, vindex; | ||||
| size_t vl = VSETVL_MAX; | size_t vl = VSETVL_MAX; | ||||
| vindex_max = VID_V_INT(vl); | |||||
| vindex_max = V_UM2_TO_IM2(VID_V_INT(vl)); | |||||
| vzero = VFMVVF_FLOAT(ZERO, vl); | vzero = VFMVVF_FLOAT(ZERO, vl); | ||||
| for (js = n; js > 0; js -= vl, posX += vl) { | for (js = n; js > 0; js -= vl, posX += vl) { | ||||
| @@ -132,4 +134,4 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | |||||
| } | |||||
| @@ -41,11 +41,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VLSSEG2_FLOAT __riscv_vlsseg2e32_v_f32m2x2 | #define VLSSEG2_FLOAT __riscv_vlsseg2e32_v_f32m2x2 | ||||
| #define VSSEG2_FLOAT __riscv_vsseg2e32_v_f32m2x2 | #define VSSEG2_FLOAT __riscv_vsseg2e32_v_f32m2x2 | ||||
| #define INT_V_T vint32m2_t | #define INT_V_T vint32m2_t | ||||
| #define VID_V_INT __riscv_vid_v_i32m2 | |||||
| #define VID_V_INT __riscv_vid_v_u32m2 | |||||
| #define VADD_VX_INT __riscv_vadd_vx_i32m2 | #define VADD_VX_INT __riscv_vadd_vx_i32m2 | ||||
| #define VMSGT_VX_INT __riscv_vmsgt_vx_i32m2_b16 | #define VMSGT_VX_INT __riscv_vmsgt_vx_i32m2_b16 | ||||
| #define VBOOL_T vbool16_t | #define VBOOL_T vbool16_t | ||||
| #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f32m2 | #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f32m2 | ||||
| #define V_UM2_TO_IM2 __riscv_vreinterpret_v_u32m2_i32m2 | |||||
| #else | #else | ||||
| #define VSETVL(n) __riscv_vsetvl_e64m2(n) | #define VSETVL(n) __riscv_vsetvl_e64m2(n) | ||||
| #define VSETVL_MAX __riscv_vsetvlmax_e64m2() | #define VSETVL_MAX __riscv_vsetvlmax_e64m2() | ||||
| @@ -60,11 +61,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VLSSEG2_FLOAT __riscv_vlsseg2e64_v_f64m2x2 | #define VLSSEG2_FLOAT __riscv_vlsseg2e64_v_f64m2x2 | ||||
| #define VSSEG2_FLOAT __riscv_vsseg2e64_v_f64m2x2 | #define VSSEG2_FLOAT __riscv_vsseg2e64_v_f64m2x2 | ||||
| #define INT_V_T vint64m2_t | #define INT_V_T vint64m2_t | ||||
| #define VID_V_INT __riscv_vid_v_i64m2 | |||||
| #define VID_V_INT __riscv_vid_v_u64m2 | |||||
| #define VADD_VX_INT __riscv_vadd_vx_i64m2 | #define VADD_VX_INT __riscv_vadd_vx_i64m2 | ||||
| #define VMSGT_VX_INT __riscv_vmsgt_vx_i64m2_b32 | #define VMSGT_VX_INT __riscv_vmsgt_vx_i64m2_b32 | ||||
| #define VBOOL_T vbool32_t | #define VBOOL_T vbool32_t | ||||
| #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f64m2 | #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f64m2 | ||||
| #define V_UM2_TO_IM2 __riscv_vreinterpret_v_u64m2_i64m2 | |||||
| #endif | #endif | ||||
| int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, FLOAT *b) | int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, FLOAT *b) | ||||
| @@ -81,7 +83,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| INT_V_T vindex_max, vindex; | INT_V_T vindex_max, vindex; | ||||
| size_t vl = VSETVL_MAX; | size_t vl = VSETVL_MAX; | ||||
| vindex_max = VID_V_INT(vl); | |||||
| vindex_max = V_UM2_TO_IM2(VID_V_INT(vl)); | |||||
| for (js = n; js > 0; js -= vl, posX += vl) { | for (js = n; js > 0; js -= vl, posX += vl) { | ||||
| vl = VSETVL(js); | vl = VSETVL(js); | ||||
| @@ -118,4 +120,3 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -41,11 +41,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VLSSEG2_FLOAT __riscv_vlsseg2e32_v_f32m2x2 | #define VLSSEG2_FLOAT __riscv_vlsseg2e32_v_f32m2x2 | ||||
| #define VSSEG2_FLOAT __riscv_vsseg2e32_v_f32m2x2 | #define VSSEG2_FLOAT __riscv_vsseg2e32_v_f32m2x2 | ||||
| #define INT_V_T vint32m2_t | #define INT_V_T vint32m2_t | ||||
| #define VID_V_INT __riscv_vid_v_i32m2 | |||||
| #define VID_V_INT __riscv_vid_v_u32m2 | |||||
| #define VADD_VX_INT __riscv_vadd_vx_i32m2 | #define VADD_VX_INT __riscv_vadd_vx_i32m2 | ||||
| #define VMSGT_VX_INT __riscv_vmsgt_vx_i32m2_b16 | #define VMSGT_VX_INT __riscv_vmsgt_vx_i32m2_b16 | ||||
| #define VBOOL_T vbool16_t | #define VBOOL_T vbool16_t | ||||
| #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f32m2 | #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f32m2 | ||||
| #define V_UM2_TO_IM2 __riscv_vreinterpret_v_u32m2_i32m2 | |||||
| #else | #else | ||||
| #define VSETVL(n) __riscv_vsetvl_e64m2(n) | #define VSETVL(n) __riscv_vsetvl_e64m2(n) | ||||
| #define VSETVL_MAX __riscv_vsetvlmax_e64m2() | #define VSETVL_MAX __riscv_vsetvlmax_e64m2() | ||||
| @@ -60,11 +61,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VLSSEG2_FLOAT __riscv_vlsseg2e64_v_f64m2x2 | #define VLSSEG2_FLOAT __riscv_vlsseg2e64_v_f64m2x2 | ||||
| #define VSSEG2_FLOAT __riscv_vsseg2e64_v_f64m2x2 | #define VSSEG2_FLOAT __riscv_vsseg2e64_v_f64m2x2 | ||||
| #define INT_V_T vint64m2_t | #define INT_V_T vint64m2_t | ||||
| #define VID_V_INT __riscv_vid_v_i64m2 | |||||
| #define VID_V_INT __riscv_vid_v_u64m2 | |||||
| #define VADD_VX_INT __riscv_vadd_vx_i64m2 | #define VADD_VX_INT __riscv_vadd_vx_i64m2 | ||||
| #define VMSGT_VX_INT __riscv_vmsgt_vx_i64m2_b32 | #define VMSGT_VX_INT __riscv_vmsgt_vx_i64m2_b32 | ||||
| #define VBOOL_T vbool32_t | #define VBOOL_T vbool32_t | ||||
| #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f64m2 | #define VMERGE_VVM_FLOAT __riscv_vmerge_vvm_f64m2 | ||||
| #define V_UM2_TO_IM2 __riscv_vreinterpret_v_u64m2_i64m2 | |||||
| #endif | #endif | ||||
| @@ -83,7 +85,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| size_t vl = VSETVL_MAX; | size_t vl = VSETVL_MAX; | ||||
| vindex_max = VID_V_INT(vl); | |||||
| vindex_max = V_UM2_TO_IM2(VID_V_INT(vl)); | |||||
| for (js = n; js > 0; js -= vl, posX += vl) { | for (js = n; js > 0; js -= vl, posX += vl) { | ||||
| vl = VSETVL(js); | vl = VSETVL(js); | ||||
| @@ -118,4 +120,4 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | |||||
| } | |||||
| @@ -42,10 +42,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VSSEG2_FLOAT __riscv_vsseg2e32_v_f32m2x2 | #define VSSEG2_FLOAT __riscv_vsseg2e32_v_f32m2x2 | ||||
| #define VBOOL_T vbool16_t | #define VBOOL_T vbool16_t | ||||
| #define UINT_V_T vint32m2_t | #define UINT_V_T vint32m2_t | ||||
| #define VID_V_UINT __riscv_vid_v_i32m2 | |||||
| #define VID_V_UINT __riscv_vid_v_u32m2 | |||||
| #define VMSGTU_VX_UINT __riscv_vmsgt_vx_i32m2_b16 | #define VMSGTU_VX_UINT __riscv_vmsgt_vx_i32m2_b16 | ||||
| #define VMSEQ_VX_UINT __riscv_vmseq_vx_i32m2_b16 | #define VMSEQ_VX_UINT __riscv_vmseq_vx_i32m2_b16 | ||||
| #define VFMERGE_VFM_FLOAT __riscv_vfmerge_vfm_f32m2 | #define VFMERGE_VFM_FLOAT __riscv_vfmerge_vfm_f32m2 | ||||
| #define V_UM2_TO_IM2 __riscv_vreinterpret_v_u32m2_i32m2 | |||||
| #else | #else | ||||
| #define VSETVL(n) __riscv_vsetvl_e64m2(n) | #define VSETVL(n) __riscv_vsetvl_e64m2(n) | ||||
| #define FLOAT_V_T vfloat64m2_t | #define FLOAT_V_T vfloat64m2_t | ||||
| @@ -63,6 +64,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define VMSGTU_VX_UINT __riscv_vmsgtu_vx_u64m2_b32 | #define VMSGTU_VX_UINT __riscv_vmsgtu_vx_u64m2_b32 | ||||
| #define VMSEQ_VX_UINT __riscv_vmseq_vx_u64m2_b32 | #define VMSEQ_VX_UINT __riscv_vmseq_vx_u64m2_b32 | ||||
| #define VFMERGE_VFM_FLOAT __riscv_vfmerge_vfm_f64m2 | #define VFMERGE_VFM_FLOAT __riscv_vfmerge_vfm_f64m2 | ||||
| #define V_UM2_TO_IM2(values) values | |||||
| #endif | #endif | ||||
| int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, FLOAT *b){ | int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, FLOAT *b){ | ||||
| @@ -99,7 +101,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| } | } | ||||
| i = 0; | i = 0; | ||||
| do | |||||
| do | |||||
| { | { | ||||
| if (X > posY) | if (X > posY) | ||||
| { | { | ||||
| @@ -119,9 +121,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| X ++; | X ++; | ||||
| i ++; | i ++; | ||||
| } | } | ||||
| else | |||||
| else | |||||
| { | { | ||||
| vindex = VID_V_UINT(vl); | |||||
| vindex = V_UM2_TO_IM2(VID_V_UINT(vl)); | |||||
| for (unsigned int j = 0; j < vl; j++) | for (unsigned int j = 0; j < vl; j++) | ||||
| { | { | ||||
| vax2 = VLSSEG2_FLOAT(ao, stride_lda, vl); | vax2 = VLSSEG2_FLOAT(ao, stride_lda, vl); | ||||
| @@ -152,4 +154,4 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | |||||
| } | |||||