Browse Source

Merge pull request #1775 from martin-frbg/issue1774

Convert fldmia/fstmia instructions to UAL syntax for clang7
tags/v0.3.4
Martin Kroeker GitHub 7 years ago
parent
commit
831c661386
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
40 changed files with 1105 additions and 1105 deletions
  1. +38
    -38
      kernel/arm/asum_vfp.S
  2. +62
    -62
      kernel/arm/axpy_vfp.S
  3. +14
    -14
      kernel/arm/ccopy_vfp.S
  4. +20
    -20
      kernel/arm/cdot_vfp.S
  5. +22
    -22
      kernel/arm/cgemm_kernel_2x2_vfp.S
  6. +32
    -32
      kernel/arm/cgemm_kernel_2x2_vfpv3.S
  7. +10
    -10
      kernel/arm/cgemm_tcopy_2_vfp.S
  8. +16
    -16
      kernel/arm/cgemv_n_vfp.S
  9. +20
    -20
      kernel/arm/cgemv_t_vfp.S
  10. +16
    -16
      kernel/arm/ctrmm_kernel_2x2_vfp.S
  11. +26
    -26
      kernel/arm/ctrmm_kernel_2x2_vfpv3.S
  12. +14
    -14
      kernel/arm/dcopy_vfp.S
  13. +20
    -20
      kernel/arm/ddot_vfp.S
  14. +4
    -4
      kernel/arm/dgemm_kernel_4x4_vfpv3.S
  15. +30
    -30
      kernel/arm/dgemm_tcopy_4_vfp.S
  16. +13
    -13
      kernel/arm/dtrmm_kernel_4x4_vfpv3.S
  17. +50
    -50
      kernel/arm/gemv_n_vfp.S
  18. +60
    -60
      kernel/arm/gemv_n_vfpv3.S
  19. +84
    -84
      kernel/arm/gemv_t_vfp.S
  20. +84
    -84
      kernel/arm/gemv_t_vfpv3.S
  21. +16
    -16
      kernel/arm/iamax_vfp.S
  22. +8
    -8
      kernel/arm/nrm2_vfp.S
  23. +8
    -8
      kernel/arm/nrm2_vfpv3.S
  24. +112
    -112
      kernel/arm/rot_vfp.S
  25. +38
    -38
      kernel/arm/scal_vfp.S
  26. +16
    -16
      kernel/arm/scopy_vfp.S
  27. +36
    -36
      kernel/arm/sdot_vfp.S
  28. +2
    -2
      kernel/arm/sgemm_kernel_4x2_vfp.S
  29. +20
    -20
      kernel/arm/sgemm_kernel_4x4_vfpv3.S
  30. +35
    -35
      kernel/arm/sgemm_tcopy_4_vfp.S
  31. +2
    -2
      kernel/arm/strmm_kernel_4x2_vfp.S
  32. +17
    -17
      kernel/arm/strmm_kernel_4x4_vfpv3.S
  33. +56
    -56
      kernel/arm/swap_vfp.S
  34. +14
    -14
      kernel/arm/zcopy_vfp.S
  35. +20
    -20
      kernel/arm/zdot_vfp.S
  36. +12
    -12
      kernel/arm/zgemm_kernel_2x2_vfp.S
  37. +12
    -12
      kernel/arm/zgemm_kernel_2x2_vfpv3.S
  38. +10
    -10
      kernel/arm/zgemm_tcopy_2_vfp.S
  39. +16
    -16
      kernel/arm/zgemv_n_vfp.S
  40. +20
    -20
      kernel/arm/zgemv_t_vfp.S

+ 38
- 38
kernel/arm/asum_vfp.S View File

@@ -58,11 +58,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4 .macro KERNEL_F4


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
fldmiad X!, { d4 - d5 }
vldmia.f64 X!, { d4 - d5 }
vabs.f64 d4, d4 vabs.f64 d4, d4
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
vabs.f64 d5, d5 vabs.f64 d5, d5
fldmiad X!, { d6 - d7 }
vldmia.f64 X!, { d6 - d7 }
vabs.f64 d6, d6 vabs.f64 d6, d6
vadd.f64 d1 , d1, d5 vadd.f64 d1 , d1, d5
vabs.f64 d7, d7 vabs.f64 d7, d7
@@ -73,7 +73,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X!, { d4 }
vldmia.f64 X!, { d4 }
vabs.f64 d4, d4 vabs.f64 d4, d4
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4


@@ -82,22 +82,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S4 .macro KERNEL_S4


fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vabs.f64 d4, d4 vabs.f64 d4, d4
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
add X, X, INC_X add X, X, INC_X


fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vabs.f64 d4, d4 vabs.f64 d4, d4
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
add X, X, INC_X add X, X, INC_X


fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vabs.f64 d4, d4 vabs.f64 d4, d4
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
add X, X, INC_X add X, X, INC_X


fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vabs.f64 d4, d4 vabs.f64 d4, d4
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
add X, X, INC_X add X, X, INC_X
@@ -107,7 +107,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vabs.f64 d4, d4 vabs.f64 d4, d4
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
add X, X, INC_X add X, X, INC_X
@@ -118,11 +118,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F4 .macro KERNEL_F4


fldmias X!, { s4 - s5 }
vldmia.f32 X!, { s4 - s5 }
vabs.f32 s4, s4 vabs.f32 s4, s4
vadd.f32 s0 , s0, s4 vadd.f32 s0 , s0, s4
vabs.f32 s5, s5 vabs.f32 s5, s5
fldmias X!, { s6 - s7 }
vldmia.f32 X!, { s6 - s7 }
vabs.f32 s6, s6 vabs.f32 s6, s6
vadd.f32 s1 , s1, s5 vadd.f32 s1 , s1, s5
vabs.f32 s7, s7 vabs.f32 s7, s7
@@ -133,7 +133,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmias X!, { s4 }
vldmia.f32 X!, { s4 }
vabs.f32 s4, s4 vabs.f32 s4, s4
vadd.f32 s0 , s0, s4 vadd.f32 s0 , s0, s4


@@ -142,22 +142,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S4 .macro KERNEL_S4


fldmias X, { s4 }
vldmia.f32 X, { s4 }
vabs.f32 s4, s4 vabs.f32 s4, s4
vadd.f32 s0 , s0, s4 vadd.f32 s0 , s0, s4
add X, X, INC_X add X, X, INC_X


fldmias X, { s4 }
vldmia.f32 X, { s4 }
vabs.f32 s4, s4 vabs.f32 s4, s4
vadd.f32 s0 , s0, s4 vadd.f32 s0 , s0, s4
add X, X, INC_X add X, X, INC_X


fldmias X, { s4 }
vldmia.f32 X, { s4 }
vabs.f32 s4, s4 vabs.f32 s4, s4
vadd.f32 s0 , s0, s4 vadd.f32 s0 , s0, s4
add X, X, INC_X add X, X, INC_X


fldmias X, { s4 }
vldmia.f32 X, { s4 }
vabs.f32 s4, s4 vabs.f32 s4, s4
vadd.f32 s0 , s0, s4 vadd.f32 s0 , s0, s4
add X, X, INC_X add X, X, INC_X
@@ -167,7 +167,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s4 }
vldmia.f32 X, { s4 }
vabs.f32 s4, s4 vabs.f32 s4, s4
vadd.f32 s0 , s0, s4 vadd.f32 s0 , s0, s4
add X, X, INC_X add X, X, INC_X
@@ -184,11 +184,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4 .macro KERNEL_F4


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
fldmiad X!, { d4 - d5 }
vldmia.f64 X!, { d4 - d5 }
vabs.f64 d4, d4 vabs.f64 d4, d4
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
vabs.f64 d5, d5 vabs.f64 d5, d5
fldmiad X!, { d6 - d7 }
vldmia.f64 X!, { d6 - d7 }
vabs.f64 d6, d6 vabs.f64 d6, d6
vadd.f64 d1 , d1, d5 vadd.f64 d1 , d1, d5
vabs.f64 d7, d7 vabs.f64 d7, d7
@@ -196,11 +196,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vadd.f64 d1 , d1, d7 vadd.f64 d1 , d1, d7


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
fldmiad X!, { d4 - d5 }
vldmia.f64 X!, { d4 - d5 }
vabs.f64 d4, d4 vabs.f64 d4, d4
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
vabs.f64 d5, d5 vabs.f64 d5, d5
fldmiad X!, { d6 - d7 }
vldmia.f64 X!, { d6 - d7 }
vabs.f64 d6, d6 vabs.f64 d6, d6
vadd.f64 d1 , d1, d5 vadd.f64 d1 , d1, d5
vabs.f64 d7, d7 vabs.f64 d7, d7
@@ -212,11 +212,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X!, { d4 }
vldmia.f64 X!, { d4 }
vabs.f64 d4, d4 vabs.f64 d4, d4
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4


fldmiad X!, { d4 }
vldmia.f64 X!, { d4 }
vabs.f64 d4, d4 vabs.f64 d4, d4
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4


@@ -226,28 +226,28 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S4 .macro KERNEL_S4


fldmiad X, { d4 -d5 }
vldmia.f64 X, { d4 -d5 }
vabs.f64 d4, d4 vabs.f64 d4, d4
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
vabs.f64 d5, d5 vabs.f64 d5, d5
vadd.f64 d0 , d0, d5 vadd.f64 d0 , d0, d5
add X, X, INC_X add X, X, INC_X


fldmiad X, { d4 -d5 }
vldmia.f64 X, { d4 -d5 }
vabs.f64 d4, d4 vabs.f64 d4, d4
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
vabs.f64 d5, d5 vabs.f64 d5, d5
vadd.f64 d0 , d0, d5 vadd.f64 d0 , d0, d5
add X, X, INC_X add X, X, INC_X


fldmiad X, { d4 -d5 }
vldmia.f64 X, { d4 -d5 }
vabs.f64 d4, d4 vabs.f64 d4, d4
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
vabs.f64 d5, d5 vabs.f64 d5, d5
vadd.f64 d0 , d0, d5 vadd.f64 d0 , d0, d5
add X, X, INC_X add X, X, INC_X


fldmiad X, { d4 -d5 }
vldmia.f64 X, { d4 -d5 }
vabs.f64 d4, d4 vabs.f64 d4, d4
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
vabs.f64 d5, d5 vabs.f64 d5, d5
@@ -259,7 +259,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X, { d4 -d5 }
vldmia.f64 X, { d4 -d5 }
vabs.f64 d4, d4 vabs.f64 d4, d4
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
vabs.f64 d5, d5 vabs.f64 d5, d5
@@ -273,22 +273,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4 .macro KERNEL_F4


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
fldmias X!, { s4 - s5 }
vldmia.f32 X!, { s4 - s5 }
vabs.f32 s4, s4 vabs.f32 s4, s4
vadd.f32 s0 , s0, s4 vadd.f32 s0 , s0, s4
vabs.f32 s5, s5 vabs.f32 s5, s5
fldmias X!, { s6 - s7 }
vldmia.f32 X!, { s6 - s7 }
vabs.f32 s6, s6 vabs.f32 s6, s6
vadd.f32 s1 , s1, s5 vadd.f32 s1 , s1, s5
vabs.f32 s7, s7 vabs.f32 s7, s7
vadd.f32 s0 , s0, s6 vadd.f32 s0 , s0, s6
vadd.f32 s1 , s1, s7 vadd.f32 s1 , s1, s7


fldmias X!, { s4 - s5 }
vldmia.f32 X!, { s4 - s5 }
vabs.f32 s4, s4 vabs.f32 s4, s4
vadd.f32 s0 , s0, s4 vadd.f32 s0 , s0, s4
vabs.f32 s5, s5 vabs.f32 s5, s5
fldmias X!, { s6 - s7 }
vldmia.f32 X!, { s6 - s7 }
vabs.f32 s6, s6 vabs.f32 s6, s6
vadd.f32 s1 , s1, s5 vadd.f32 s1 , s1, s5
vabs.f32 s7, s7 vabs.f32 s7, s7
@@ -300,11 +300,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmias X!, { s4 }
vldmia.f32 X!, { s4 }
vabs.f32 s4, s4 vabs.f32 s4, s4
vadd.f32 s0 , s0, s4 vadd.f32 s0 , s0, s4


fldmias X!, { s4 }
vldmia.f32 X!, { s4 }
vabs.f32 s4, s4 vabs.f32 s4, s4
vadd.f32 s0 , s0, s4 vadd.f32 s0 , s0, s4


@@ -313,28 +313,28 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S4 .macro KERNEL_S4


fldmias X, { s4 -s5 }
vldmia.f32 X, { s4 -s5 }
vabs.f32 s4, s4 vabs.f32 s4, s4
vadd.f32 s0 , s0, s4 vadd.f32 s0 , s0, s4
vabs.f32 s5, s5 vabs.f32 s5, s5
vadd.f32 s0 , s0, s5 vadd.f32 s0 , s0, s5
add X, X, INC_X add X, X, INC_X


fldmias X, { s4 -s5 }
vldmia.f32 X, { s4 -s5 }
vabs.f32 s4, s4 vabs.f32 s4, s4
vadd.f32 s0 , s0, s4 vadd.f32 s0 , s0, s4
vabs.f32 s5, s5 vabs.f32 s5, s5
vadd.f32 s0 , s0, s5 vadd.f32 s0 , s0, s5
add X, X, INC_X add X, X, INC_X


fldmias X, { s4 -s5 }
vldmia.f32 X, { s4 -s5 }
vabs.f32 s4, s4 vabs.f32 s4, s4
vadd.f32 s0 , s0, s4 vadd.f32 s0 , s0, s4
vabs.f32 s5, s5 vabs.f32 s5, s5
vadd.f32 s0 , s0, s5 vadd.f32 s0 , s0, s5
add X, X, INC_X add X, X, INC_X


fldmias X, { s4 -s5 }
vldmia.f32 X, { s4 -s5 }
vabs.f32 s4, s4 vabs.f32 s4, s4
vadd.f32 s0 , s0, s4 vadd.f32 s0 , s0, s4
vabs.f32 s5, s5 vabs.f32 s5, s5
@@ -346,7 +346,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s4 -s5 }
vldmia.f32 X, { s4 -s5 }
vabs.f32 s4, s4 vabs.f32 s4, s4
vadd.f32 s0 , s0, s4 vadd.f32 s0 , s0, s4
vabs.f32 s5, s5 vabs.f32 s5, s5


+ 62
- 62
kernel/arm/axpy_vfp.S View File

@@ -146,17 +146,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4 .macro KERNEL_F4


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
fldmiad X!, { d4 - d7 }
vldmia.f64 X!, { d4 - d7 }
pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]
fldmiad Y , { d8 - d11 }
vldmia.f64 Y , { d8 - d11 }
fmacd d8 , d0, d4 fmacd d8 , d0, d4
fstmiad Y!, { d8 }
vstmia.f64 Y!, { d8 }
fmacd d9 , d0, d5 fmacd d9 , d0, d5
fstmiad Y!, { d9 }
vstmia.f64 Y!, { d9 }
fmacd d10, d0, d6 fmacd d10, d0, d6
fstmiad Y!, { d10 }
vstmia.f64 Y!, { d10 }
fmacd d11, d0, d7 fmacd d11, d0, d7
fstmiad Y!, { d11 }
vstmia.f64 Y!, { d11 }




.endm .endm
@@ -164,19 +164,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X!, { d4 }
fldmiad Y , { d8 }
vldmia.f64 X!, { d4 }
vldmia.f64 Y , { d8 }
fmacd d8 , d0, d4 fmacd d8 , d0, d4
fstmiad Y!, { d8 }
vstmia.f64 Y!, { d8 }


.endm .endm


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X , { d4 }
fldmiad Y , { d8 }
vldmia.f64 X , { d4 }
vldmia.f64 Y , { d8 }
fmacd d8 , d0, d4 fmacd d8 , d0, d4
fstmiad Y , { d8 }
vstmia.f64 Y , { d8 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


@@ -186,16 +186,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F4 .macro KERNEL_F4


fldmias X!, { s4 - s7 }
fldmias Y , { s8 - s11 }
vldmia.f32 X!, { s4 - s7 }
vldmia.f32 Y , { s8 - s11 }
fmacs s8 , s0, s4 fmacs s8 , s0, s4
fstmias Y!, { s8 }
vstmia.f32 Y!, { s8 }
fmacs s9 , s0, s5 fmacs s9 , s0, s5
fstmias Y!, { s9 }
vstmia.f32 Y!, { s9 }
fmacs s10, s0, s6 fmacs s10, s0, s6
fstmias Y!, { s10 }
vstmia.f32 Y!, { s10 }
fmacs s11, s0, s7 fmacs s11, s0, s7
fstmias Y!, { s11 }
vstmia.f32 Y!, { s11 }




.endm .endm
@@ -203,19 +203,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmias X!, { s4 }
fldmias Y , { s8 }
vldmia.f32 X!, { s4 }
vldmia.f32 Y , { s8 }
fmacs s8 , s0, s4 fmacs s8 , s0, s4
fstmias Y!, { s8 }
vstmia.f32 Y!, { s8 }


.endm .endm


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X , { s4 }
fldmias Y , { s8 }
vldmia.f32 X , { s4 }
vldmia.f32 Y , { s8 }
fmacs s8 , s0, s4 fmacs s8 , s0, s4
fstmias Y , { s8 }
vstmia.f32 Y , { s8 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


@@ -231,42 +231,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4 .macro KERNEL_F4


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
fldmiad X!, { d4 - d7 }
vldmia.f64 X!, { d4 - d7 }
pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]
fldmiad Y , { d8 - d11 }
vldmia.f64 Y , { d8 - d11 }


FMAC_R1 d8 , d0, d4 FMAC_R1 d8 , d0, d4
FMAC_R2 d8 , d1, d5 FMAC_R2 d8 , d1, d5
FMAC_I1 d9 , d0, d5 FMAC_I1 d9 , d0, d5
FMAC_I2 d9 , d1, d4 FMAC_I2 d9 , d1, d4
fstmiad Y!, { d8 }
fstmiad Y!, { d9 }
vstmia.f64 Y!, { d8 }
vstmia.f64 Y!, { d9 }


FMAC_R1 d10, d0, d6 FMAC_R1 d10, d0, d6
FMAC_R2 d10, d1, d7 FMAC_R2 d10, d1, d7
FMAC_I1 d11, d0, d7 FMAC_I1 d11, d0, d7
FMAC_I2 d11, d1, d6 FMAC_I2 d11, d1, d6
fstmiad Y!, { d10 }
fstmiad Y!, { d11 }
vstmia.f64 Y!, { d10 }
vstmia.f64 Y!, { d11 }


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
fldmiad X!, { d4 - d7 }
vldmia.f64 X!, { d4 - d7 }
pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]
fldmiad Y , { d8 - d11 }
vldmia.f64 Y , { d8 - d11 }


FMAC_R1 d8 , d0, d4 FMAC_R1 d8 , d0, d4
FMAC_R2 d8 , d1, d5 FMAC_R2 d8 , d1, d5
FMAC_I1 d9 , d0, d5 FMAC_I1 d9 , d0, d5
FMAC_I2 d9 , d1, d4 FMAC_I2 d9 , d1, d4
fstmiad Y!, { d8 }
fstmiad Y!, { d9 }
vstmia.f64 Y!, { d8 }
vstmia.f64 Y!, { d9 }


FMAC_R1 d10, d0, d6 FMAC_R1 d10, d0, d6
FMAC_R2 d10, d1, d7 FMAC_R2 d10, d1, d7
FMAC_I1 d11, d0, d7 FMAC_I1 d11, d0, d7
FMAC_I2 d11, d1, d6 FMAC_I2 d11, d1, d6
fstmiad Y!, { d10 }
fstmiad Y!, { d11 }
vstmia.f64 Y!, { d10 }
vstmia.f64 Y!, { d11 }






@@ -277,15 +277,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X!, { d4 - d5 }
fldmiad Y , { d8 - d9 }
vldmia.f64 X!, { d4 - d5 }
vldmia.f64 Y , { d8 - d9 }


FMAC_R1 d8 , d0, d4 FMAC_R1 d8 , d0, d4
FMAC_R2 d8 , d1, d5 FMAC_R2 d8 , d1, d5
FMAC_I1 d9 , d0, d5 FMAC_I1 d9 , d0, d5
FMAC_I2 d9 , d1, d4 FMAC_I2 d9 , d1, d4
fstmiad Y!, { d8 }
fstmiad Y!, { d9 }
vstmia.f64 Y!, { d8 }
vstmia.f64 Y!, { d9 }






@@ -293,14 +293,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X , { d4 - d5 }
fldmiad Y , { d8 - d9 }
vldmia.f64 X , { d4 - d5 }
vldmia.f64 Y , { d8 - d9 }


FMAC_R1 d8 , d0, d4 FMAC_R1 d8 , d0, d4
FMAC_R2 d8 , d1, d5 FMAC_R2 d8 , d1, d5
FMAC_I1 d9 , d0, d5 FMAC_I1 d9 , d0, d5
FMAC_I2 d9 , d1, d4 FMAC_I2 d9 , d1, d4
fstmiad Y , { d8 - d9 }
vstmia.f64 Y , { d8 - d9 }


add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y
@@ -314,40 +314,40 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4 .macro KERNEL_F4


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
fldmias X!, { s4 - s7 }
vldmia.f32 X!, { s4 - s7 }
pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]
fldmias Y , { s8 - s11 }
vldmia.f32 Y , { s8 - s11 }


FMAC_R1 s8 , s0, s4 FMAC_R1 s8 , s0, s4
FMAC_R2 s8 , s1, s5 FMAC_R2 s8 , s1, s5
FMAC_I1 s9 , s0, s5 FMAC_I1 s9 , s0, s5
FMAC_I2 s9 , s1, s4 FMAC_I2 s9 , s1, s4
fstmias Y!, { s8 }
fstmias Y!, { s9 }
vstmia.f32 Y!, { s8 }
vstmia.f32 Y!, { s9 }


FMAC_R1 s10, s0, s6 FMAC_R1 s10, s0, s6
FMAC_R2 s10, s1, s7 FMAC_R2 s10, s1, s7
FMAC_I1 s11, s0, s7 FMAC_I1 s11, s0, s7
FMAC_I2 s11, s1, s6 FMAC_I2 s11, s1, s6
fstmias Y!, { s10 }
fstmias Y!, { s11 }
vstmia.f32 Y!, { s10 }
vstmia.f32 Y!, { s11 }


fldmias X!, { s4 - s7 }
fldmias Y , { s8 - s11 }
vldmia.f32 X!, { s4 - s7 }
vldmia.f32 Y , { s8 - s11 }


FMAC_R1 s8 , s0, s4 FMAC_R1 s8 , s0, s4
FMAC_R2 s8 , s1, s5 FMAC_R2 s8 , s1, s5
FMAC_I1 s9 , s0, s5 FMAC_I1 s9 , s0, s5
FMAC_I2 s9 , s1, s4 FMAC_I2 s9 , s1, s4
fstmias Y!, { s8 }
fstmias Y!, { s9 }
vstmia.f32 Y!, { s8 }
vstmia.f32 Y!, { s9 }


FMAC_R1 s10, s0, s6 FMAC_R1 s10, s0, s6
FMAC_R2 s10, s1, s7 FMAC_R2 s10, s1, s7
FMAC_I1 s11, s0, s7 FMAC_I1 s11, s0, s7
FMAC_I2 s11, s1, s6 FMAC_I2 s11, s1, s6
fstmias Y!, { s10 }
fstmias Y!, { s11 }
vstmia.f32 Y!, { s10 }
vstmia.f32 Y!, { s11 }






@@ -358,15 +358,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmias X!, { s4 - s5 }
fldmias Y , { s8 - s9 }
vldmia.f32 X!, { s4 - s5 }
vldmia.f32 Y , { s8 - s9 }


FMAC_R1 s8 , s0, s4 FMAC_R1 s8 , s0, s4
FMAC_R2 s8 , s1, s5 FMAC_R2 s8 , s1, s5
FMAC_I1 s9 , s0, s5 FMAC_I1 s9 , s0, s5
FMAC_I2 s9 , s1, s4 FMAC_I2 s9 , s1, s4
fstmias Y!, { s8 }
fstmias Y!, { s9 }
vstmia.f32 Y!, { s8 }
vstmia.f32 Y!, { s9 }






@@ -374,14 +374,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X , { s4 - s5 }
fldmias Y , { s8 - s9 }
vldmia.f32 X , { s4 - s5 }
vldmia.f32 Y , { s8 - s9 }


FMAC_R1 s8 , s0, s4 FMAC_R1 s8 , s0, s4
FMAC_R2 s8 , s1, s5 FMAC_R2 s8 , s1, s5
FMAC_I1 s9 , s0, s5 FMAC_I1 s9 , s0, s5
FMAC_I2 s9 , s1, s4 FMAC_I2 s9 , s1, s4
fstmias Y , { s8 - s9 }
vstmia.f32 Y , { s8 - s9 }


add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


+ 14
- 14
kernel/arm/ccopy_vfp.S View File

@@ -65,15 +65,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY_F4 .macro COPY_F4


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
fldmias X!, { s0 - s7 }
fstmias Y!, { s0 - s7 }
vldmia.f32 X!, { s0 - s7 }
vstmia.f32 Y!, { s0 - s7 }


.endm .endm


.macro COPY_F1 .macro COPY_F1


fldmias X!, { s0 - s1 }
fstmias Y!, { s0 - s1 }
vldmia.f32 X!, { s0 - s1 }
vstmia.f32 Y!, { s0 - s1 }


.endm .endm


@@ -83,23 +83,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY_S4 .macro COPY_S4


nop nop
fldmias X, { s0 - s1 }
fstmias Y, { s0 - s1 }
vldmia.f32 X, { s0 - s1 }
vstmia.f32 Y, { s0 - s1 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmias X, { s2 - s3 }
fstmias Y, { s2 - s3 }
vldmia.f32 X, { s2 - s3 }
vstmia.f32 Y, { s2 - s3 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmias X, { s0 - s1 }
fstmias Y, { s0 - s1 }
vldmia.f32 X, { s0 - s1 }
vstmia.f32 Y, { s0 - s1 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmias X, { s2 - s3 }
fstmias Y, { s2 - s3 }
vldmia.f32 X, { s2 - s3 }
vstmia.f32 Y, { s2 - s3 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


@@ -108,8 +108,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY_S1 .macro COPY_S1


fldmias X, { s0 - s1 }
fstmias Y, { s0 - s1 }
vldmia.f32 X, { s0 - s1 }
vstmia.f32 Y, { s0 - s1 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y




+ 20
- 20
kernel/arm/cdot_vfp.S View File

@@ -76,30 +76,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ X, #X_PRE ] pld [ X, #X_PRE ]
pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]


fldmias X!, { s4 - s5 }
fldmias Y!, { s8 - s9 }
vldmia.f32 X!, { s4 - s5 }
vldmia.f32 Y!, { s8 - s9 }
fmacs s0 , s4, s8 fmacs s0 , s4, s8
fmacs s1 , s4, s9 fmacs s1 , s4, s9
fldmias X!, { s6 - s7 }
vldmia.f32 X!, { s6 - s7 }
fmacs s2 , s5, s9 fmacs s2 , s5, s9
fmacs s3 , s5, s8 fmacs s3 , s5, s8


fldmias Y!, { s10 - s11 }
vldmia.f32 Y!, { s10 - s11 }
fmacs s0 , s6, s10 fmacs s0 , s6, s10
fmacs s1 , s6, s11 fmacs s1 , s6, s11
fmacs s2 , s7, s11 fmacs s2 , s7, s11
fmacs s3 , s7, s10 fmacs s3 , s7, s10




fldmias X!, { s4 - s5 }
fldmias Y!, { s8 - s9 }
vldmia.f32 X!, { s4 - s5 }
vldmia.f32 Y!, { s8 - s9 }
fmacs s0 , s4, s8 fmacs s0 , s4, s8
fmacs s1 , s4, s9 fmacs s1 , s4, s9
fldmias X!, { s6 - s7 }
vldmia.f32 X!, { s6 - s7 }
fmacs s2 , s5, s9 fmacs s2 , s5, s9
fmacs s3 , s5, s8 fmacs s3 , s5, s8


fldmias Y!, { s10 - s11 }
vldmia.f32 Y!, { s10 - s11 }
fmacs s0 , s6, s10 fmacs s0 , s6, s10
fmacs s1 , s6, s11 fmacs s1 , s6, s11
fmacs s2 , s7, s11 fmacs s2 , s7, s11
@@ -109,8 +109,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmias X!, { s4 - s5 }
fldmias Y!, { s8 - s9 }
vldmia.f32 X!, { s4 - s5 }
vldmia.f32 Y!, { s8 - s9 }
fmacs s0 , s4, s8 fmacs s0 , s4, s8
fmacs s1 , s4, s9 fmacs s1 , s4, s9
fmacs s2 , s5, s9 fmacs s2 , s5, s9
@@ -125,8 +125,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


nop nop


fldmias X, { s4 - s5 }
fldmias Y, { s8 - s9 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s8 - s9 }
fmacs s0 , s4, s8 fmacs s0 , s4, s8
fmacs s1 , s4, s9 fmacs s1 , s4, s9
fmacs s2 , s5, s9 fmacs s2 , s5, s9
@@ -134,8 +134,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmias X, { s4 - s5 }
fldmias Y, { s8 - s9 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s8 - s9 }
fmacs s0 , s4, s8 fmacs s0 , s4, s8
fmacs s1 , s4, s9 fmacs s1 , s4, s9
fmacs s2 , s5, s9 fmacs s2 , s5, s9
@@ -143,8 +143,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmias X, { s4 - s5 }
fldmias Y, { s8 - s9 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s8 - s9 }
fmacs s0 , s4, s8 fmacs s0 , s4, s8
fmacs s1 , s4, s9 fmacs s1 , s4, s9
fmacs s2 , s5, s9 fmacs s2 , s5, s9
@@ -152,8 +152,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmias X, { s4 - s5 }
fldmias Y, { s8 - s9 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s8 - s9 }
fmacs s0 , s4, s8 fmacs s0 , s4, s8
fmacs s1 , s4, s9 fmacs s1 , s4, s9
fmacs s2 , s5, s9 fmacs s2 , s5, s9
@@ -166,8 +166,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s4 - s5 }
fldmias Y, { s8 - s9 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s8 - s9 }
fmacs s0 , s4, s8 fmacs s0 , s4, s8
fmacs s1 , s4, s9 fmacs s1 , s4, s9
fmacs s2 , s5, s9 fmacs s2 , s5, s9


+ 22
- 22
kernel/arm/cgemm_kernel_2x2_vfp.S View File

@@ -165,9 +165,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL2x2_I .macro KERNEL2x2_I


pld [ AO, #A_PRE ] pld [ AO, #A_PRE ]
fldmias AO!, { s0 - s3 }
vldmia.f32 AO!, { s0 - s3 }
pld [ BO, #B_PRE ] pld [ BO, #B_PRE ]
fldmias BO!, { s4 - s7 }
vldmia.f32 BO!, { s4 - s7 }




fmuls s8 , s0, s4 fmuls s8 , s0, s4
@@ -197,9 +197,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL2x2_M1 .macro KERNEL2x2_M1


pld [ AO, #A_PRE ] pld [ AO, #A_PRE ]
fldmias AO!, { s0 - s3 }
vldmia.f32 AO!, { s0 - s3 }
pld [ BO, #B_PRE ] pld [ BO, #B_PRE ]
fldmias BO!, { s4 - s7 }
vldmia.f32 BO!, { s4 - s7 }


fmacs s8 , s0, s4 fmacs s8 , s0, s4
fmacs s9 , s0, s5 fmacs s9 , s0, s5
@@ -225,8 +225,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL2x2_M2 .macro KERNEL2x2_M2


fldmias AO!, { s0 - s3 }
fldmias BO!, { s4 - s7 }
vldmia.f32 AO!, { s0 - s3 }
vldmia.f32 BO!, { s4 - s7 }


fmacs s8 , s0, s4 fmacs s8 , s0, s4
fmacs s9 , s0, s5 fmacs s9 , s0, s5
@@ -254,8 +254,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL2x2_E .macro KERNEL2x2_E


fldmias AO!, { s0 - s3 }
fldmias BO!, { s4 - s7 }
vldmia.f32 AO!, { s0 - s3 }
vldmia.f32 BO!, { s4 - s7 }


fmacs s8 , s0, s4 fmacs s8 , s0, s4
fmacs s9 , s0, s5 fmacs s9 , s0, s5
@@ -282,8 +282,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL2x2_SUB .macro KERNEL2x2_SUB


fldmias AO!, { s0 - s3 }
fldmias BO!, { s4 - s7 }
vldmia.f32 AO!, { s0 - s3 }
vldmia.f32 BO!, { s4 - s7 }


fmacs s8 , s0, s4 fmacs s8 , s0, s4
fmacs s9 , s0, s5 fmacs s9 , s0, s5
@@ -317,7 +317,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R flds s0, ALPHA_R
flds s1, ALPHA_I flds s1, ALPHA_I


fldmias CO1, { s4 - s7 }
vldmia.f32 CO1, { s4 - s7 }


FMAC_R1 s4 , s0 , s8 FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9 FMAC_I1 s5 , s0 , s9
@@ -329,9 +329,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s11 FMAC_R2 s6 , s1 , s11
FMAC_I2 s7 , s1 , s10 FMAC_I2 s7 , s1 , s10


fstmias CO1, { s4 - s7 }
vstmia.f32 CO1, { s4 - s7 }


fldmias CO2, { s4 - s7 }
vldmia.f32 CO2, { s4 - s7 }


FMAC_R1 s4 , s0 , s12 FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13 FMAC_I1 s5 , s0 , s13
@@ -343,7 +343,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s15 FMAC_R2 s6 , s1 , s15
FMAC_I2 s7 , s1 , s14 FMAC_I2 s7 , s1 , s14


fstmias CO2, { s4 - s7 }
vstmia.f32 CO2, { s4 - s7 }


add CO1, CO1, #16 add CO1, CO1, #16


@@ -500,23 +500,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R flds s0, ALPHA_R
flds s1, ALPHA_I flds s1, ALPHA_I


fldmias CO1, { s4 - s5 }
vldmia.f32 CO1, { s4 - s5 }


FMAC_R1 s4 , s0 , s8 FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9 FMAC_I1 s5 , s0 , s9
FMAC_R2 s4 , s1 , s9 FMAC_R2 s4 , s1 , s9
FMAC_I2 s5 , s1 , s8 FMAC_I2 s5 , s1 , s8


fstmias CO1, { s4 - s5 }
vstmia.f32 CO1, { s4 - s5 }


fldmias CO2, { s4 - s5 }
vldmia.f32 CO2, { s4 - s5 }


FMAC_R1 s4 , s0 , s12 FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13 FMAC_I1 s5 , s0 , s13
FMAC_R2 s4 , s1 , s13 FMAC_R2 s4 , s1 , s13
FMAC_I2 s5 , s1 , s12 FMAC_I2 s5 , s1 , s12


fstmias CO2, { s4 - s5 }
vstmia.f32 CO2, { s4 - s5 }


add CO1, CO1, #8 add CO1, CO1, #8


@@ -671,7 +671,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R flds s0, ALPHA_R
flds s1, ALPHA_I flds s1, ALPHA_I


fldmias CO1, { s4 - s7 }
vldmia.f32 CO1, { s4 - s7 }


FMAC_R1 s4 , s0 , s8 FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9 FMAC_I1 s5 , s0 , s9
@@ -683,7 +683,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s11 FMAC_R2 s6 , s1 , s11
FMAC_I2 s7 , s1 , s10 FMAC_I2 s7 , s1 , s10


fstmias CO1, { s4 - s7 }
vstmia.f32 CO1, { s4 - s7 }


add CO1, CO1, #16 add CO1, CO1, #16


@@ -800,14 +800,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R flds s0, ALPHA_R
flds s1, ALPHA_I flds s1, ALPHA_I


fldmias CO1, { s4 - s5 }
vldmia.f32 CO1, { s4 - s5 }


FMAC_R1 s4 , s0 , s8 FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9 FMAC_I1 s5 , s0 , s9
FMAC_R2 s4 , s1 , s9 FMAC_R2 s4 , s1 , s9
FMAC_I2 s5 , s1 , s8 FMAC_I2 s5 , s1 , s8


fstmias CO1, { s4 - s5 }
vstmia.f32 CO1, { s4 - s5 }


add CO1, CO1, #8 add CO1, CO1, #8




+ 32
- 32
kernel/arm/cgemm_kernel_2x2_vfpv3.S View File

@@ -182,30 +182,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL2x2_I .macro KERNEL2x2_I
pld [ AO , #A_PRE ] pld [ AO , #A_PRE ]
pld [ BO , #B_PRE ] pld [ BO , #B_PRE ]
fldmias AO!, { s0 - s1 }
fldmias BO!, { s8 - s9 }
vldmia.f32 AO!, { s0 - s1 }
vldmia.f32 BO!, { s8 - s9 }


fmuls s16 , s0, s8 fmuls s16 , s0, s8
fmuls s24 , s1, s9 fmuls s24 , s1, s9
fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmuls s17 , s0, s9 fmuls s17 , s0, s9
fmuls s25 , s1, s8 fmuls s25 , s1, s8


fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmuls s18 , s2, s8 fmuls s18 , s2, s8
fmuls s26 , s3, s9 fmuls s26 , s3, s9
fldmias AO!, { s4 - s5 }
vldmia.f32 AO!, { s4 - s5 }
fmuls s19 , s2, s9 fmuls s19 , s2, s9
fmuls s27 , s3, s8 fmuls s27 , s3, s8


fldmias BO!, { s12 - s13 }
vldmia.f32 BO!, { s12 - s13 }
fmuls s20 , s0, s10 fmuls s20 , s0, s10
fmuls s28 , s1, s11 fmuls s28 , s1, s11
fldmias AO!, { s6 - s7 }
vldmia.f32 AO!, { s6 - s7 }
fmuls s21 , s0, s11 fmuls s21 , s0, s11
fmuls s29 , s1, s10 fmuls s29 , s1, s10


fldmias BO!, { s14 - s15 }
vldmia.f32 BO!, { s14 - s15 }
fmuls s22 , s2, s10 fmuls s22 , s2, s10
fmuls s30 , s3, s11 fmuls s30 , s3, s11
fmuls s23 , s2, s11 fmuls s23 , s2, s11
@@ -218,17 +218,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL2x2_M1 .macro KERNEL2x2_M1


fmacs s16 , s0, s8 fmacs s16 , s0, s8
fldmias AO!, { s4 - s5 }
vldmia.f32 AO!, { s4 - s5 }
fmacs s24 , s1, s9 fmacs s24 , s1, s9
fmacs s17 , s0, s9 fmacs s17 , s0, s9
fldmias BO!, { s12 - s13 }
vldmia.f32 BO!, { s12 - s13 }
fmacs s25 , s1, s8 fmacs s25 , s1, s8


fmacs s18 , s2, s8 fmacs s18 , s2, s8
fldmias AO!, { s6 - s7 }
vldmia.f32 AO!, { s6 - s7 }
fmacs s26 , s3, s9 fmacs s26 , s3, s9
fmacs s19 , s2, s9 fmacs s19 , s2, s9
fldmias BO!, { s14 - s15 }
vldmia.f32 BO!, { s14 - s15 }
fmacs s27 , s3, s8 fmacs s27 , s3, s8


fmacs s20 , s0, s10 fmacs s20 , s0, s10
@@ -250,19 +250,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ BO , #B_PRE ] pld [ BO , #B_PRE ]
fmacs s24 , s5, s13 fmacs s24 , s5, s13
fmacs s17 , s4, s13 fmacs s17 , s4, s13
fldmias AO!, { s0 - s1 }
vldmia.f32 AO!, { s0 - s1 }
fmacs s25 , s5, s12 fmacs s25 , s5, s12


fmacs s18 , s6, s12 fmacs s18 , s6, s12
fmacs s26 , s7, s13 fmacs s26 , s7, s13
fldmias BO!, { s8 - s9 }
vldmia.f32 BO!, { s8 - s9 }
fmacs s19 , s6, s13 fmacs s19 , s6, s13
fmacs s27 , s7, s12 fmacs s27 , s7, s12


fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmacs s20 , s4, s14 fmacs s20 , s4, s14
fmacs s28 , s5, s15 fmacs s28 , s5, s15
fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmacs s21 , s4, s15 fmacs s21 , s4, s15
fmacs s29 , s5, s14 fmacs s29 , s5, s14


@@ -300,16 +300,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL2x2_SUB .macro KERNEL2x2_SUB


fldmias AO!, { s0 - s1 }
fldmias BO!, { s8 - s9 }
vldmia.f32 AO!, { s0 - s1 }
vldmia.f32 BO!, { s8 - s9 }


fmacs s16 , s0, s8 fmacs s16 , s0, s8
fmacs s24 , s1, s9 fmacs s24 , s1, s9
fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmacs s17 , s0, s9 fmacs s17 , s0, s9
fmacs s25 , s1, s8 fmacs s25 , s1, s8


fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmacs s18 , s2, s8 fmacs s18 , s2, s8
fmacs s26 , s3, s9 fmacs s26 , s3, s9
fmacs s19 , s2, s9 fmacs s19 , s2, s9
@@ -338,8 +338,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R flds s0, ALPHA_R
flds s1, ALPHA_I flds s1, ALPHA_I


fldmias CO1, { s4 - s7 }
fldmias CO2, { s8 - s11 }
vldmia.f32 CO1, { s4 - s7 }
vldmia.f32 CO2, { s8 - s11 }


FADD_R s16, s24 , s16 FADD_R s16, s24 , s16
FADD_I s17, s25 , s17 FADD_I s17, s25 , s17
@@ -370,8 +370,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s10, s1 , s23 FMAC_R2 s10, s1 , s23
FMAC_I2 s11, s1 , s22 FMAC_I2 s11, s1 , s22


fstmias CO1, { s4 - s7 }
fstmias CO2, { s8 - s11 }
vstmia.f32 CO1, { s4 - s7 }
vstmia.f32 CO2, { s8 - s11 }


add CO1, CO1, #16 add CO1, CO1, #16


@@ -534,8 +534,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R flds s0, ALPHA_R
flds s1, ALPHA_I flds s1, ALPHA_I


fldmias CO1, { s4 - s5 }
fldmias CO2, { s8 - s9 }
vldmia.f32 CO1, { s4 - s5 }
vldmia.f32 CO2, { s8 - s9 }


FADD_R s16, s24 , s16 FADD_R s16, s24 , s16
FADD_I s17, s25 , s17 FADD_I s17, s25 , s17
@@ -552,8 +552,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s8 , s1 , s21 FMAC_R2 s8 , s1 , s21
FMAC_I2 s9 , s1 , s20 FMAC_I2 s9 , s1 , s20


fstmias CO1, { s4 - s5 }
fstmias CO2, { s8 - s9 }
vstmia.f32 CO1, { s4 - s5 }
vstmia.f32 CO2, { s8 - s9 }


add CO1, CO1, #8 add CO1, CO1, #8


@@ -716,7 +716,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R flds s0, ALPHA_R
flds s1, ALPHA_I flds s1, ALPHA_I


fldmias CO1, { s4 - s7 }
vldmia.f32 CO1, { s4 - s7 }


FADD_R s16, s24 , s16 FADD_R s16, s24 , s16
FADD_I s17, s25 , s17 FADD_I s17, s25 , s17
@@ -733,7 +733,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s19 FMAC_R2 s6 , s1 , s19
FMAC_I2 s7 , s1 , s18 FMAC_I2 s7 , s1 , s18


fstmias CO1, { s4 - s7 }
vstmia.f32 CO1, { s4 - s7 }


add CO1, CO1, #16 add CO1, CO1, #16


@@ -851,7 +851,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R flds s0, ALPHA_R
flds s1, ALPHA_I flds s1, ALPHA_I


fldmias CO1, { s4 - s5 }
vldmia.f32 CO1, { s4 - s5 }


FADD_R s16, s24 , s16 FADD_R s16, s24 , s16
FADD_I s17, s25 , s17 FADD_I s17, s25 , s17
@@ -861,7 +861,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s4 , s1 , s17 FMAC_R2 s4 , s1 , s17
FMAC_I2 s5 , s1 , s16 FMAC_I2 s5 , s1 , s16


fstmias CO1, { s4 - s5 }
vstmia.f32 CO1, { s4 - s5 }


add CO1, CO1, #8 add CO1, CO1, #8




+ 10
- 10
kernel/arm/cgemm_tcopy_2_vfp.S View File

@@ -73,12 +73,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**************************************************************************************/ **************************************************************************************/
.macro COPY2x2 .macro COPY2x2


fldmias AO1, { s0 - s3 }
vldmia.f32 AO1, { s0 - s3 }


add r3, AO1, LDA add r3, AO1, LDA
fldmias r3, { s4 - s7 }
vldmia.f32 r3, { s4 - s7 }


fstmias BO1, { s0 - s7 }
vstmia.f32 BO1, { s0 - s7 }
add AO1, AO1, #16 add AO1, AO1, #16
add BO1, BO1, M4 add BO1, BO1, M4


@@ -86,12 +86,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY1x2 .macro COPY1x2


fldmias AO1, { s0 -s1 }
vldmia.f32 AO1, { s0 -s1 }


add r3, AO1, LDA add r3, AO1, LDA
fldmias r3, { s2 - s3 }
vldmia.f32 r3, { s2 - s3 }


fstmias BO2, { s0 - s3 }
vstmia.f32 BO2, { s0 - s3 }
add AO1, AO1, #8 add AO1, AO1, #8
add BO2, BO2, #16 add BO2, BO2, #16


@@ -100,9 +100,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*************************************************************************************************************************/ /*************************************************************************************************************************/
.macro COPY2x1 .macro COPY2x1


fldmias AO1, { s0 - s3 }
vldmia.f32 AO1, { s0 - s3 }


fstmias BO1, { s0 - s3 }
vstmia.f32 BO1, { s0 - s3 }
add AO1, AO1, #16 add AO1, AO1, #16
add BO1, BO1, M4 add BO1, BO1, M4


@@ -110,9 +110,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY1x1 .macro COPY1x1


fldmias AO1, { s0 - s1 }
vldmia.f32 AO1, { s0 - s1 }


fstmias BO2, { s0 - s1 }
vstmia.f32 BO2, { s0 - s1 }
add AO1, AO1, #8 add AO1, AO1, #8
add BO2, BO2, #8 add BO2, BO2, #8




+ 16
- 16
kernel/arm/cgemv_n_vfp.S View File

@@ -201,7 +201,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R flds s0, ALPHA_R
flds s1, ALPHA_I flds s1, ALPHA_I


fldmias YO, { s4 - s7 }
vldmia.f32 YO, { s4 - s7 }


FMAC_R1 s4 , s0 , s8 FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9 FMAC_I1 s5 , s0 , s9
@@ -213,9 +213,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s11 FMAC_R2 s6 , s1 , s11
FMAC_I2 s7 , s1 , s10 FMAC_I2 s7 , s1 , s10


fstmias YO!, { s4 - s7 }
vstmia.f32 YO!, { s4 - s7 }


fldmias YO, { s4 - s7 }
vldmia.f32 YO, { s4 - s7 }


FMAC_R1 s4 , s0 , s12 FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13 FMAC_I1 s5 , s0 , s13
@@ -227,7 +227,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s15 FMAC_R2 s6 , s1 , s15
FMAC_I2 s7 , s1 , s14 FMAC_I2 s7 , s1 , s14


fstmias YO!, { s4 - s7 }
vstmia.f32 YO!, { s4 - s7 }


.endm .endm


@@ -266,14 +266,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R flds s0, ALPHA_R
flds s1, ALPHA_I flds s1, ALPHA_I


fldmias YO, { s4 - s5 }
vldmia.f32 YO, { s4 - s5 }


FMAC_R1 s4 , s0 , s8 FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9 FMAC_I1 s5 , s0 , s9
FMAC_R2 s4 , s1 , s9 FMAC_R2 s4 , s1 , s9
FMAC_I2 s5 , s1 , s8 FMAC_I2 s5 , s1 , s8


fstmias YO, { s4 - s5 }
vstmia.f32 YO, { s4 - s5 }


add YO, YO, #8 add YO, YO, #8


@@ -349,47 +349,47 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R flds s0, ALPHA_R
flds s1, ALPHA_I flds s1, ALPHA_I


fldmias YO, { s4 - s5 }
vldmia.f32 YO, { s4 - s5 }


FMAC_R1 s4 , s0 , s8 FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9 FMAC_I1 s5 , s0 , s9
FMAC_R2 s4 , s1 , s9 FMAC_R2 s4 , s1 , s9
FMAC_I2 s5 , s1 , s8 FMAC_I2 s5 , s1 , s8


fstmias YO, { s4 - s5 }
vstmia.f32 YO, { s4 - s5 }


add YO, YO, INC_Y add YO, YO, INC_Y


fldmias YO, { s6 - s7 }
vldmia.f32 YO, { s6 - s7 }


FMAC_R1 s6 , s0 , s10 FMAC_R1 s6 , s0 , s10
FMAC_I1 s7 , s0 , s11 FMAC_I1 s7 , s0 , s11
FMAC_R2 s6 , s1 , s11 FMAC_R2 s6 , s1 , s11
FMAC_I2 s7 , s1 , s10 FMAC_I2 s7 , s1 , s10


fstmias YO, { s6 - s7 }
vstmia.f32 YO, { s6 - s7 }


add YO, YO, INC_Y add YO, YO, INC_Y


fldmias YO, { s4 - s5 }
vldmia.f32 YO, { s4 - s5 }


FMAC_R1 s4 , s0 , s12 FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13 FMAC_I1 s5 , s0 , s13
FMAC_R2 s4 , s1 , s13 FMAC_R2 s4 , s1 , s13
FMAC_I2 s5 , s1 , s12 FMAC_I2 s5 , s1 , s12


fstmias YO, { s4 - s5 }
vstmia.f32 YO, { s4 - s5 }


add YO, YO, INC_Y add YO, YO, INC_Y


fldmias YO, { s6 - s7 }
vldmia.f32 YO, { s6 - s7 }


FMAC_R1 s6 , s0 , s14 FMAC_R1 s6 , s0 , s14
FMAC_I1 s7 , s0 , s15 FMAC_I1 s7 , s0 , s15
FMAC_R2 s6 , s1 , s15 FMAC_R2 s6 , s1 , s15
FMAC_I2 s7 , s1 , s14 FMAC_I2 s7 , s1 , s14


fstmias YO, { s6 - s7 }
vstmia.f32 YO, { s6 - s7 }


add YO, YO, INC_Y add YO, YO, INC_Y


@@ -430,14 +430,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R flds s0, ALPHA_R
flds s1, ALPHA_I flds s1, ALPHA_I


fldmias YO, { s4 - s5 }
vldmia.f32 YO, { s4 - s5 }


FMAC_R1 s4 , s0 , s8 FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9 FMAC_I1 s5 , s0 , s9
FMAC_R2 s4 , s1 , s9 FMAC_R2 s4 , s1 , s9
FMAC_I2 s5 , s1 , s8 FMAC_I2 s5 , s1 , s8


fstmias YO, { s4 - s5 }
vstmia.f32 YO, { s4 - s5 }


add YO, YO, INC_Y add YO, YO, INC_Y




+ 20
- 20
kernel/arm/cgemv_t_vfp.S View File

@@ -150,9 +150,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F2X1 .macro KERNEL_F2X1


fldmias XO! , { s2 - s3 }
fldmias AO1!, { s4 - s5 }
fldmias AO2!, { s8 - s9 }
vldmia.f32 XO! , { s2 - s3 }
vldmia.f32 AO1!, { s4 - s5 }
vldmia.f32 AO2!, { s8 - s9 }


fmacs s12 , s4 , s2 fmacs s12 , s4 , s2
fmacs s13 , s4 , s3 fmacs s13 , s4 , s3
@@ -168,7 +168,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_F2 .macro SAVE_F2


fldmias YO, { s4 - s7 }
vldmia.f32 YO, { s4 - s7 }


FMAC_R1 s4 , s0 , s12 FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13 FMAC_I1 s5 , s0 , s13
@@ -180,7 +180,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s15 FMAC_R2 s6 , s1 , s15
FMAC_I2 s7 , s1 , s14 FMAC_I2 s7 , s1 , s14


fstmias YO!, { s4 - s7 }
vstmia.f32 YO!, { s4 - s7 }


.endm .endm


@@ -204,8 +204,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1X1 .macro KERNEL_F1X1


fldmias XO! , { s2 - s3 }
fldmias AO1!, { s4 - s5 }
vldmia.f32 XO! , { s2 - s3 }
vldmia.f32 AO1!, { s4 - s5 }


fmacs s12 , s4 , s2 fmacs s12 , s4 , s2
fmacs s13 , s4 , s3 fmacs s13 , s4 , s3
@@ -216,14 +216,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_F1 .macro SAVE_F1


fldmias YO, { s4 - s5 }
vldmia.f32 YO, { s4 - s5 }


FMAC_R1 s4 , s0 , s12 FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13 FMAC_I1 s5 , s0 , s13
FMAC_R2 s4 , s1 , s13 FMAC_R2 s4 , s1 , s13
FMAC_I2 s5 , s1 , s12 FMAC_I2 s5 , s1 , s12


fstmias YO!, { s4 - s5 }
vstmia.f32 YO!, { s4 - s5 }


.endm .endm


@@ -249,9 +249,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S2X1 .macro KERNEL_S2X1


fldmias XO , { s2 - s3 }
fldmias AO1!, { s4 - s5 }
fldmias AO2!, { s8 - s9 }
vldmia.f32 XO , { s2 - s3 }
vldmia.f32 AO1!, { s4 - s5 }
vldmia.f32 AO2!, { s8 - s9 }


fmacs s12 , s4 , s2 fmacs s12 , s4 , s2
fmacs s13 , s4 , s3 fmacs s13 , s4 , s3
@@ -269,25 +269,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S2 .macro SAVE_S2


fldmias YO, { s4 - s5 }
vldmia.f32 YO, { s4 - s5 }


FMAC_R1 s4 , s0 , s12 FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13 FMAC_I1 s5 , s0 , s13
FMAC_R2 s4 , s1 , s13 FMAC_R2 s4 , s1 , s13
FMAC_I2 s5 , s1 , s12 FMAC_I2 s5 , s1 , s12


fstmias YO, { s4 - s5 }
vstmia.f32 YO, { s4 - s5 }


add YO, YO, INC_Y add YO, YO, INC_Y


fldmias YO, { s6 - s7 }
vldmia.f32 YO, { s6 - s7 }


FMAC_R1 s6 , s0 , s14 FMAC_R1 s6 , s0 , s14
FMAC_I1 s7 , s0 , s15 FMAC_I1 s7 , s0 , s15
FMAC_R2 s6 , s1 , s15 FMAC_R2 s6 , s1 , s15
FMAC_I2 s7 , s1 , s14 FMAC_I2 s7 , s1 , s14


fstmias YO, { s6 - s7 }
vstmia.f32 YO, { s6 - s7 }


add YO, YO, INC_Y add YO, YO, INC_Y


@@ -313,8 +313,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1X1 .macro KERNEL_S1X1


fldmias XO , { s2 - s3 }
fldmias AO1!, { s4 - s5 }
vldmia.f32 XO , { s2 - s3 }
vldmia.f32 AO1!, { s4 - s5 }


fmacs s12 , s4 , s2 fmacs s12 , s4 , s2
fmacs s13 , s4 , s3 fmacs s13 , s4 , s3
@@ -327,14 +327,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S1 .macro SAVE_S1


fldmias YO, { s4 - s5 }
vldmia.f32 YO, { s4 - s5 }


FMAC_R1 s4 , s0 , s12 FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13 FMAC_I1 s5 , s0 , s13
FMAC_R2 s4 , s1 , s13 FMAC_R2 s4 , s1 , s13
FMAC_I2 s5 , s1 , s12 FMAC_I2 s5 , s1 , s12


fstmias YO, { s4 - s5 }
vstmia.f32 YO, { s4 - s5 }


add YO, YO, INC_Y add YO, YO, INC_Y




+ 16
- 16
kernel/arm/ctrmm_kernel_2x2_vfp.S View File

@@ -165,9 +165,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL2x2_I .macro KERNEL2x2_I


pld [ AO, #A_PRE ] pld [ AO, #A_PRE ]
fldmias AO!, { s0 - s3 }
vldmia.f32 AO!, { s0 - s3 }
pld [ BO, #B_PRE ] pld [ BO, #B_PRE ]
fldmias BO!, { s4 - s7 }
vldmia.f32 BO!, { s4 - s7 }




fmuls s8 , s0, s4 fmuls s8 , s0, s4
@@ -197,9 +197,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL2x2_M1 .macro KERNEL2x2_M1


pld [ AO, #A_PRE ] pld [ AO, #A_PRE ]
fldmias AO!, { s0 - s3 }
vldmia.f32 AO!, { s0 - s3 }
pld [ BO, #B_PRE ] pld [ BO, #B_PRE ]
fldmias BO!, { s4 - s7 }
vldmia.f32 BO!, { s4 - s7 }


fmacs s8 , s0, s4 fmacs s8 , s0, s4
fmacs s9 , s0, s5 fmacs s9 , s0, s5
@@ -225,8 +225,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL2x2_M2 .macro KERNEL2x2_M2


fldmias AO!, { s0 - s3 }
fldmias BO!, { s4 - s7 }
vldmia.f32 AO!, { s0 - s3 }
vldmia.f32 BO!, { s4 - s7 }


fmacs s8 , s0, s4 fmacs s8 , s0, s4
fmacs s9 , s0, s5 fmacs s9 , s0, s5
@@ -254,8 +254,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL2x2_E .macro KERNEL2x2_E


fldmias AO!, { s0 - s3 }
fldmias BO!, { s4 - s7 }
vldmia.f32 AO!, { s0 - s3 }
vldmia.f32 BO!, { s4 - s7 }


fmacs s8 , s0, s4 fmacs s8 , s0, s4
fmacs s9 , s0, s5 fmacs s9 , s0, s5
@@ -282,8 +282,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL2x2_SUB .macro KERNEL2x2_SUB


fldmias AO!, { s0 - s3 }
fldmias BO!, { s4 - s7 }
vldmia.f32 AO!, { s0 - s3 }
vldmia.f32 BO!, { s4 - s7 }


fmacs s8 , s0, s4 fmacs s8 , s0, s4
fmacs s9 , s0, s5 fmacs s9 , s0, s5
@@ -331,7 +331,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s11 FMAC_R2 s6 , s1 , s11
FMAC_I2 s7 , s1 , s10 FMAC_I2 s7 , s1 , s10


fstmias CO1, { s4 - s7 }
vstmia.f32 CO1, { s4 - s7 }


flds s4, FP_ZERO flds s4, FP_ZERO
vmov.f32 s5, s4 vmov.f32 s5, s4
@@ -348,7 +348,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s15 FMAC_R2 s6 , s1 , s15
FMAC_I2 s7 , s1 , s14 FMAC_I2 s7 , s1 , s14


fstmias CO2, { s4 - s7 }
vstmia.f32 CO2, { s4 - s7 }


add CO1, CO1, #16 add CO1, CO1, #16


@@ -513,7 +513,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s4 , s1 , s9 FMAC_R2 s4 , s1 , s9
FMAC_I2 s5 , s1 , s8 FMAC_I2 s5 , s1 , s8


fstmias CO1, { s4 - s5 }
vstmia.f32 CO1, { s4 - s5 }


flds s4, FP_ZERO flds s4, FP_ZERO
vmov.f32 s5, s4 vmov.f32 s5, s4
@@ -523,7 +523,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s4 , s1 , s13 FMAC_R2 s4 , s1 , s13
FMAC_I2 s5 , s1 , s12 FMAC_I2 s5 , s1 , s12


fstmias CO2, { s4 - s5 }
vstmia.f32 CO2, { s4 - s5 }


add CO1, CO1, #8 add CO1, CO1, #8


@@ -693,7 +693,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s11 FMAC_R2 s6 , s1 , s11
FMAC_I2 s7 , s1 , s10 FMAC_I2 s7 , s1 , s10


fstmias CO1, { s4 - s7 }
vstmia.f32 CO1, { s4 - s7 }


add CO1, CO1, #16 add CO1, CO1, #16


@@ -818,7 +818,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s4 , s1 , s9 FMAC_R2 s4 , s1 , s9
FMAC_I2 s5 , s1 , s8 FMAC_I2 s5 , s1 , s8


fstmias CO1, { s4 - s5 }
vstmia.f32 CO1, { s4 - s5 }


add CO1, CO1, #8 add CO1, CO1, #8




+ 26
- 26
kernel/arm/ctrmm_kernel_2x2_vfpv3.S View File

@@ -170,30 +170,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL2x2_I .macro KERNEL2x2_I
pld [ AO , #A_PRE ] pld [ AO , #A_PRE ]
pld [ BO , #B_PRE ] pld [ BO , #B_PRE ]
fldmias AO!, { s0 - s1 }
fldmias BO!, { s8 - s9 }
vldmia.f32 AO!, { s0 - s1 }
vldmia.f32 BO!, { s8 - s9 }


fmuls s16 , s0, s8 fmuls s16 , s0, s8
fmuls s24 , s1, s9 fmuls s24 , s1, s9
fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmuls s17 , s0, s9 fmuls s17 , s0, s9
fmuls s25 , s1, s8 fmuls s25 , s1, s8


fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmuls s18 , s2, s8 fmuls s18 , s2, s8
fmuls s26 , s3, s9 fmuls s26 , s3, s9
fldmias AO!, { s4 - s5 }
vldmia.f32 AO!, { s4 - s5 }
fmuls s19 , s2, s9 fmuls s19 , s2, s9
fmuls s27 , s3, s8 fmuls s27 , s3, s8


fldmias BO!, { s12 - s13 }
vldmia.f32 BO!, { s12 - s13 }
fmuls s20 , s0, s10 fmuls s20 , s0, s10
fmuls s28 , s1, s11 fmuls s28 , s1, s11
fldmias AO!, { s6 - s7 }
vldmia.f32 AO!, { s6 - s7 }
fmuls s21 , s0, s11 fmuls s21 , s0, s11
fmuls s29 , s1, s10 fmuls s29 , s1, s10


fldmias BO!, { s14 - s15 }
vldmia.f32 BO!, { s14 - s15 }
fmuls s22 , s2, s10 fmuls s22 , s2, s10
fmuls s30 , s3, s11 fmuls s30 , s3, s11
fmuls s23 , s2, s11 fmuls s23 , s2, s11
@@ -206,17 +206,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL2x2_M1 .macro KERNEL2x2_M1


fmacs s16 , s0, s8 fmacs s16 , s0, s8
fldmias AO!, { s4 - s5 }
vldmia.f32 AO!, { s4 - s5 }
fmacs s24 , s1, s9 fmacs s24 , s1, s9
fmacs s17 , s0, s9 fmacs s17 , s0, s9
fldmias BO!, { s12 - s13 }
vldmia.f32 BO!, { s12 - s13 }
fmacs s25 , s1, s8 fmacs s25 , s1, s8


fmacs s18 , s2, s8 fmacs s18 , s2, s8
fldmias AO!, { s6 - s7 }
vldmia.f32 AO!, { s6 - s7 }
fmacs s26 , s3, s9 fmacs s26 , s3, s9
fmacs s19 , s2, s9 fmacs s19 , s2, s9
fldmias BO!, { s14 - s15 }
vldmia.f32 BO!, { s14 - s15 }
fmacs s27 , s3, s8 fmacs s27 , s3, s8


fmacs s20 , s0, s10 fmacs s20 , s0, s10
@@ -238,19 +238,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ BO , #B_PRE ] pld [ BO , #B_PRE ]
fmacs s24 , s5, s13 fmacs s24 , s5, s13
fmacs s17 , s4, s13 fmacs s17 , s4, s13
fldmias AO!, { s0 - s1 }
vldmia.f32 AO!, { s0 - s1 }
fmacs s25 , s5, s12 fmacs s25 , s5, s12


fmacs s18 , s6, s12 fmacs s18 , s6, s12
fmacs s26 , s7, s13 fmacs s26 , s7, s13
fldmias BO!, { s8 - s9 }
vldmia.f32 BO!, { s8 - s9 }
fmacs s19 , s6, s13 fmacs s19 , s6, s13
fmacs s27 , s7, s12 fmacs s27 , s7, s12


fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmacs s20 , s4, s14 fmacs s20 , s4, s14
fmacs s28 , s5, s15 fmacs s28 , s5, s15
fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmacs s21 , s4, s15 fmacs s21 , s4, s15
fmacs s29 , s5, s14 fmacs s29 , s5, s14


@@ -288,16 +288,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL2x2_SUB .macro KERNEL2x2_SUB


fldmias AO!, { s0 - s1 }
fldmias BO!, { s8 - s9 }
vldmia.f32 AO!, { s0 - s1 }
vldmia.f32 BO!, { s8 - s9 }


fmacs s16 , s0, s8 fmacs s16 , s0, s8
fmacs s24 , s1, s9 fmacs s24 , s1, s9
fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmacs s17 , s0, s9 fmacs s17 , s0, s9
fmacs s25 , s1, s8 fmacs s25 , s1, s8


fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmacs s18 , s2, s8 fmacs s18 , s2, s8
fmacs s26 , s3, s9 fmacs s26 , s3, s9
fmacs s19 , s2, s9 fmacs s19 , s2, s9
@@ -354,8 +354,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s10, s1 , s23 FMAC_R2 s10, s1 , s23
FMAC_I2 s11, s1 , s22 FMAC_I2 s11, s1 , s22


fstmias CO1, { s4 - s7 }
fstmias CO2, { s8 - s11 }
vstmia.f32 CO1, { s4 - s7 }
vstmia.f32 CO2, { s8 - s11 }


add CO1, CO1, #16 add CO1, CO1, #16


@@ -532,8 +532,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s8 , s1 , s21 FMAC_R2 s8 , s1 , s21
FMAC_I2 s9 , s1 , s20 FMAC_I2 s9 , s1 , s20


fstmias CO1, { s4 - s5 }
fstmias CO2, { s8 - s9 }
vstmia.f32 CO1, { s4 - s5 }
vstmia.f32 CO2, { s8 - s9 }


add CO1, CO1, #8 add CO1, CO1, #8


@@ -710,7 +710,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s19 FMAC_R2 s6 , s1 , s19
FMAC_I2 s7 , s1 , s18 FMAC_I2 s7 , s1 , s18


fstmias CO1, { s4 - s7 }
vstmia.f32 CO1, { s4 - s7 }


add CO1, CO1, #16 add CO1, CO1, #16


@@ -835,7 +835,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s4 , s1 , s17 FMAC_R2 s4 , s1 , s17
FMAC_I2 s5 , s1 , s16 FMAC_I2 s5 , s1 , s16


fstmias CO1, { s4 - s5 }
vstmia.f32 CO1, { s4 - s5 }


add CO1, CO1, #8 add CO1, CO1, #8




+ 14
- 14
kernel/arm/dcopy_vfp.S View File

@@ -65,15 +65,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY_F4 .macro COPY_F4


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
fldmiad X!, { d0 - d3 }
fstmiad Y!, { d0 - d3 }
vldmia.f64 X!, { d0 - d3 }
vstmia.f64 Y!, { d0 - d3 }


.endm .endm


.macro COPY_F1 .macro COPY_F1


fldmiad X!, { d0 }
fstmiad Y!, { d0 }
vldmia.f64 X!, { d0 }
vstmia.f64 Y!, { d0 }


.endm .endm


@@ -83,23 +83,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY_S4 .macro COPY_S4


nop nop
fldmiad X, { d0 }
fstmiad Y, { d0 }
vldmia.f64 X, { d0 }
vstmia.f64 Y, { d0 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmiad X, { d1 }
fstmiad Y, { d1 }
vldmia.f64 X, { d1 }
vstmia.f64 Y, { d1 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmiad X, { d0 }
fstmiad Y, { d0 }
vldmia.f64 X, { d0 }
vstmia.f64 Y, { d0 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmiad X, { d1 }
fstmiad Y, { d1 }
vldmia.f64 X, { d1 }
vstmia.f64 Y, { d1 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


@@ -108,8 +108,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY_S1 .macro COPY_S1


fldmiad X, { d0 }
fstmiad Y, { d0 }
vldmia.f64 X, { d0 }
vstmia.f64 Y, { d0 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y




+ 20
- 20
kernel/arm/ddot_vfp.S View File

@@ -67,26 +67,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4 .macro KERNEL_F4


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
fldmiad X!, { d8 }
vldmia.f64 X!, { d8 }
pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]
fldmiad Y!, { d4 }
fldmiad Y!, { d5 }
vldmia.f64 Y!, { d4 }
vldmia.f64 Y!, { d5 }
fmacd d0 , d4, d8 fmacd d0 , d4, d8
fldmiad X!, { d9 }
fldmiad Y!, { d6 }
vldmia.f64 X!, { d9 }
vldmia.f64 Y!, { d6 }
fmacd d1 , d5, d9 fmacd d1 , d5, d9
fldmiad X!, { d10 }
fldmiad X!, { d11 }
vldmia.f64 X!, { d10 }
vldmia.f64 X!, { d11 }
fmacd d0 , d6, d10 fmacd d0 , d6, d10
fldmiad Y!, { d7 }
vldmia.f64 Y!, { d7 }
fmacd d1 , d7, d11 fmacd d1 , d7, d11


.endm .endm


.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X!, { d4 }
fldmiad Y!, { d8 }
vldmia.f64 X!, { d4 }
vldmia.f64 Y!, { d8 }
fmacd d0 , d4, d8 fmacd d0 , d4, d8


.endm .endm
@@ -97,26 +97,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_S4 .macro KERNEL_S4


nop nop
fldmiad X, { d4 }
fldmiad Y, { d8 }
vldmia.f64 X, { d4 }
vldmia.f64 Y, { d8 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y
fmacd d0 , d4, d8 fmacd d0 , d4, d8


fldmiad X, { d5 }
fldmiad Y, { d9 }
vldmia.f64 X, { d5 }
vldmia.f64 Y, { d9 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y
fmacd d1 , d5, d9 fmacd d1 , d5, d9


fldmiad X, { d6 }
fldmiad Y, { d10 }
vldmia.f64 X, { d6 }
vldmia.f64 Y, { d10 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y
fmacd d0 , d6, d10 fmacd d0 , d6, d10


fldmiad X, { d7 }
fldmiad Y, { d11 }
vldmia.f64 X, { d7 }
vldmia.f64 Y, { d11 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y
fmacd d1 , d7, d11 fmacd d1 , d7, d11
@@ -126,8 +126,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X, { d4 }
fldmiad Y, { d8 }
vldmia.f64 X, { d4 }
vldmia.f64 Y, { d8 }
add X, X, INC_X add X, X, INC_X
fmacd d0 , d4, d8 fmacd d0 , d4, d8
add Y, Y, INC_Y add Y, Y, INC_Y


+ 4
- 4
kernel/arm/dgemm_kernel_4x4_vfpv3.S View File

@@ -331,7 +331,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add r4 , CO2, r3 add r4 , CO2, r3
pld [ CO2 , #C_PRE ] pld [ CO2 , #C_PRE ]


fldmiad CO1, { d8 - d11 }
vldmia.f64 CO1, { d8 - d11 }
pld [ r4 , #C_PRE ] pld [ r4 , #C_PRE ]


fmacd d8 , d0 , d16 fmacd d8 , d0 , d16
@@ -352,7 +352,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmacd d15, d0 , d23 fmacd d15, d0 , d23
fstd d11, [CO1, #24 ] fstd d11, [CO1, #24 ]


fldmiad r4, { d8 - d11 }
vldmia.f64 r4, { d8 - d11 }


fmacd d8 , d0 , d24 fmacd d8 , d0 , d24
fstd d12, [CO2] fstd d12, [CO2]
@@ -367,7 +367,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


pld [ CO2 , #C_PRE ] pld [ CO2 , #C_PRE ]


fldmiad CO2, { d12 - d15 }
vldmia.f64 CO2, { d12 - d15 }


fstd d8 , [r4 ] fstd d8 , [r4 ]
fmacd d12, d0 , d28 fmacd d12, d0 , d28
@@ -378,7 +378,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fstd d11, [r4 , #24 ] fstd d11, [r4 , #24 ]
fmacd d15, d0 , d31 fmacd d15, d0 , d31


fstmiad CO2, { d12 - d15 }
vstmia.f64 CO2, { d12 - d15 }


add CO1, CO1, #32 add CO1, CO1, #32




+ 30
- 30
kernel/arm/dgemm_tcopy_4_vfp.S View File

@@ -76,21 +76,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY4x4 .macro COPY4x4


pld [ AO1, #A_PRE ] pld [ AO1, #A_PRE ]
fldmiad AO1, { d0 - d3 }
vldmia.f64 AO1, { d0 - d3 }


add r3, AO1, LDA add r3, AO1, LDA
pld [ r3, #A_PRE ] pld [ r3, #A_PRE ]
fldmiad r3, { d4 - d7 }
vldmia.f64 r3, { d4 - d7 }


add r3, r3, LDA add r3, r3, LDA
pld [ r3, #A_PRE ] pld [ r3, #A_PRE ]
fldmiad r3, { d8 - d11 }
vldmia.f64 r3, { d8 - d11 }


add r3, r3, LDA add r3, r3, LDA
pld [ r3, #A_PRE ] pld [ r3, #A_PRE ]
fldmiad r3, { d12 - d15 }
vldmia.f64 r3, { d12 - d15 }


fstmiad BO1, { d0 - d15 }
vstmia.f64 BO1, { d0 - d15 }
add AO1, AO1, #32 add AO1, AO1, #32
add BO1, BO1, M4 add BO1, BO1, M4


@@ -98,18 +98,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY2x4 .macro COPY2x4


fldmiad AO1, { d0 - d1 }
vldmia.f64 AO1, { d0 - d1 }


add r3, AO1, LDA add r3, AO1, LDA
fldmiad r3, { d2 - d3 }
vldmia.f64 r3, { d2 - d3 }


add r3, r3, LDA add r3, r3, LDA
fldmiad r3, { d4 - d5 }
vldmia.f64 r3, { d4 - d5 }


add r3, r3, LDA add r3, r3, LDA
fldmiad r3, { d6 - d7 }
vldmia.f64 r3, { d6 - d7 }


fstmiad BO2, { d0 - d7 }
vstmia.f64 BO2, { d0 - d7 }
add AO1, AO1, #16 add AO1, AO1, #16
add BO2, BO2, #64 add BO2, BO2, #64


@@ -117,18 +117,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY1x4 .macro COPY1x4


fldmiad AO1, { d0 }
vldmia.f64 AO1, { d0 }


add r3, AO1, LDA add r3, AO1, LDA
fldmiad r3, { d1 }
vldmia.f64 r3, { d1 }


add r3, r3, LDA add r3, r3, LDA
fldmiad r3, { d2 }
vldmia.f64 r3, { d2 }


add r3, r3, LDA add r3, r3, LDA
fldmiad r3, { d3 }
vldmia.f64 r3, { d3 }


fstmiad BO3, { d0 - d3 }
vstmia.f64 BO3, { d0 - d3 }
add AO1, AO1, #8 add AO1, AO1, #8
add BO3, BO3, #32 add BO3, BO3, #32


@@ -139,13 +139,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY4x2 .macro COPY4x2


pld [ AO1, #A_PRE ] pld [ AO1, #A_PRE ]
fldmiad AO1, { d0 - d3 }
vldmia.f64 AO1, { d0 - d3 }


add r3, AO1, LDA add r3, AO1, LDA
pld [ r3, #A_PRE ] pld [ r3, #A_PRE ]
fldmiad r3, { d4 - d7 }
vldmia.f64 r3, { d4 - d7 }


fstmiad BO1, { d0 - d7 }
vstmia.f64 BO1, { d0 - d7 }
add AO1, AO1, #32 add AO1, AO1, #32
add BO1, BO1, M4 add BO1, BO1, M4


@@ -153,12 +153,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY2x2 .macro COPY2x2


fldmiad AO1, { d0 - d1 }
vldmia.f64 AO1, { d0 - d1 }


add r3, AO1, LDA add r3, AO1, LDA
fldmiad r3, { d2 - d3 }
vldmia.f64 r3, { d2 - d3 }


fstmiad BO2, { d0 - d3 }
vstmia.f64 BO2, { d0 - d3 }
add AO1, AO1, #16 add AO1, AO1, #16
add BO2, BO2, #32 add BO2, BO2, #32


@@ -166,12 +166,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY1x2 .macro COPY1x2


fldmiad AO1, { d0 }
vldmia.f64 AO1, { d0 }


add r3, AO1, LDA add r3, AO1, LDA
fldmiad r3, { d1 }
vldmia.f64 r3, { d1 }


fstmiad BO3, { d0 - d1 }
vstmia.f64 BO3, { d0 - d1 }
add AO1, AO1, #8 add AO1, AO1, #8
add BO3, BO3, #16 add BO3, BO3, #16


@@ -182,9 +182,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY4x1 .macro COPY4x1


pld [ AO1, #A_PRE ] pld [ AO1, #A_PRE ]
fldmiad AO1, { d0 - d3 }
vldmia.f64 AO1, { d0 - d3 }


fstmiad BO1, { d0 - d3 }
vstmia.f64 BO1, { d0 - d3 }
add AO1, AO1, #32 add AO1, AO1, #32
add BO1, BO1, M4 add BO1, BO1, M4


@@ -192,9 +192,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY2x1 .macro COPY2x1


fldmiad AO1, { d0 - d1 }
vldmia.f64 AO1, { d0 - d1 }


fstmiad BO2, { d0 - d1 }
vstmia.f64 BO2, { d0 - d1 }
add AO1, AO1, #16 add AO1, AO1, #16
add BO2, BO2, #16 add BO2, BO2, #16


@@ -202,9 +202,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY1x1 .macro COPY1x1


fldmiad AO1, { d0 }
vldmia.f64 AO1, { d0 }


fstmiad BO3, { d0 }
vstmia.f64 BO3, { d0 }
add AO1, AO1, #8 add AO1, AO1, #8
add BO3, BO3, #8 add BO3, BO3, #8




+ 13
- 13
kernel/arm/dtrmm_kernel_4x4_vfpv3.S View File

@@ -128,10 +128,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d8 , [ BO ] fldd d8 , [ BO ]


pld [ AO , #A_PRE ] pld [ AO , #A_PRE ]
fldmiad AO!, { d0 - d1}
vldmia.f64 AO!, { d0 - d1}


fmuld d16 , d0, d8 fmuld d16 , d0, d8
fldmiad AO!, { d2 - d3}
vldmia.f64 AO!, { d2 - d3}
fmuld d17 , d1, d8 fmuld d17 , d1, d8
fldd d9 , [ BO, #8 ] fldd d9 , [ BO, #8 ]
fmuld d18 , d2, d8 fmuld d18 , d2, d8
@@ -148,10 +148,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmuld d23 , d3, d9 fmuld d23 , d3, d9


fmuld d24 , d0, d10 fmuld d24 , d0, d10
fldmiad AO!, { d4 - d5 }
vldmia.f64 AO!, { d4 - d5 }
fmuld d25 , d1, d10 fmuld d25 , d1, d10
fmuld d26 , d2, d10 fmuld d26 , d2, d10
fldmiad AO!, { d6 - d7 }
vldmia.f64 AO!, { d6 - d7 }
fmuld d27 , d3, d10 fmuld d27 , d3, d10


fldd d13, [ BO, #8 ] fldd d13, [ BO, #8 ]
@@ -173,10 +173,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d8 , [ BO ] fldd d8 , [ BO ]


pld [ AO , #A_PRE ] pld [ AO , #A_PRE ]
fldmiad AO!, { d0 - d1}
vldmia.f64 AO!, { d0 - d1}


fmacd d16 , d0, d8 fmacd d16 , d0, d8
fldmiad AO!, { d2 - d3}
vldmia.f64 AO!, { d2 - d3}
fmacd d17 , d1, d8 fmacd d17 , d1, d8
fldd d9 , [ BO, #8 ] fldd d9 , [ BO, #8 ]
fmacd d18 , d2, d8 fmacd d18 , d2, d8
@@ -193,10 +193,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmacd d23 , d3, d9 fmacd d23 , d3, d9


fmacd d24 , d0, d10 fmacd d24 , d0, d10
fldmiad AO!, { d4 - d5 }
vldmia.f64 AO!, { d4 - d5 }
fmacd d25 , d1, d10 fmacd d25 , d1, d10
fmacd d26 , d2, d10 fmacd d26 , d2, d10
fldmiad AO!, { d6 - d7 }
vldmia.f64 AO!, { d6 - d7 }
fmacd d27 , d3, d10 fmacd d27 , d3, d10


fldd d13, [ BO, #8 ] fldd d13, [ BO, #8 ]
@@ -225,11 +225,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d8 , [ BO ] fldd d8 , [ BO ]
fmacd d21 , d5, d13 fmacd d21 , d5, d13
fmacd d22 , d6, d13 fmacd d22 , d6, d13
fldmiad AO!, { d0 - d1 }
vldmia.f64 AO!, { d0 - d1 }
fmacd d23 , d7, d13 fmacd d23 , d7, d13


fmacd d24 , d4, d14 fmacd d24 , d4, d14
fldmiad AO!, { d2 - d3 }
vldmia.f64 AO!, { d2 - d3 }
fmacd d25 , d5, d14 fmacd d25 , d5, d14
fldd d9 , [ BO, #8 ] fldd d9 , [ BO, #8 ]
fmacd d26 , d6, d14 fmacd d26 , d6, d14
@@ -257,10 +257,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmacd d19 , d3, d8 fmacd d19 , d3, d8


fmacd d20 , d0, d9 fmacd d20 , d0, d9
fldmiad AO!, { d4 - d5 }
vldmia.f64 AO!, { d4 - d5 }
fmacd d21 , d1, d9 fmacd d21 , d1, d9
fmacd d22 , d2, d9 fmacd d22 , d2, d9
fldmiad AO!, { d6 - d7 }
vldmia.f64 AO!, { d6 - d7 }
fmacd d23 , d3, d9 fmacd d23 , d3, d9


fmacd d24 , d0, d10 fmacd d24 , d0, d10
@@ -390,7 +390,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fstd d11, [r4 , #24 ] fstd d11, [r4 , #24 ]
fmuld d15, d0 , d31 fmuld d15, d0 , d31


fstmiad CO2, { d12 - d15 }
vstmia.f64 CO2, { d12 - d15 }


add CO1, CO1, #32 add CO1, CO1, #32




+ 50
- 50
kernel/arm/gemv_n_vfp.S View File

@@ -139,8 +139,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F8X1 .macro KERNEL_F8X1


pld [ AO2 , #A_PRE ] pld [ AO2 , #A_PRE ]
fldmiad XO! , { d2 }
fldmiad AO1 , { d4 - d7 }
vldmia.f64 XO! , { d2 }
vldmia.f64 AO1 , { d4 - d7 }


vmla.f64 d8 , d2 , d4 vmla.f64 d8 , d2 , d4
pld [ AO2 , #4*SIZE ] pld [ AO2 , #4*SIZE ]
@@ -150,7 +150,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmla.f64 d11 , d2 , d7 vmla.f64 d11 , d2 , d7




fldmiad r3 , { d4 - d7 }
vldmia.f64 r3 , { d4 - d7 }


vmla.f64 d12 , d2 , d4 vmla.f64 d12 , d2 , d4
vmla.f64 d13 , d2 , d5 vmla.f64 d13 , d2 , d5
@@ -164,23 +164,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_F8 .macro SAVE_F8


fldmiad YO, { d4 - d7 }
vldmia.f64 YO, { d4 - d7 }


vmla.f64 d4 , d0, d8 vmla.f64 d4 , d0, d8
vmla.f64 d5 , d0, d9 vmla.f64 d5 , d0, d9
vmla.f64 d6 , d0, d10 vmla.f64 d6 , d0, d10
vmla.f64 d7 , d0, d11 vmla.f64 d7 , d0, d11


fstmiad YO!, { d4 - d7 }
vstmia.f64 YO!, { d4 - d7 }


fldmiad YO, { d4 - d7 }
vldmia.f64 YO, { d4 - d7 }


vmla.f64 d4 , d0, d12 vmla.f64 d4 , d0, d12
vmla.f64 d5 , d0, d13 vmla.f64 d5 , d0, d13
vmla.f64 d6 , d0, d14 vmla.f64 d6 , d0, d14
vmla.f64 d7 , d0, d15 vmla.f64 d7 , d0, d15


fstmiad YO!, { d4 - d7 }
vstmia.f64 YO!, { d4 - d7 }


.endm .endm


@@ -195,8 +195,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1X1 .macro KERNEL_F1X1


fldmiad XO! , { d2 }
fldmiad AO1 , { d8 }
vldmia.f64 XO! , { d2 }
vldmia.f64 AO1 , { d8 }
vmla.f64 d12 , d2 , d8 vmla.f64 d12 , d2 , d8
add AO1, AO1, LDA add AO1, AO1, LDA


@@ -204,9 +204,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_F1 .macro SAVE_F1


fldmiad YO, { d4 }
vldmia.f64 YO, { d4 }
vmla.f64 d4, d0, d12 vmla.f64 d4, d0, d12
fstmiad YO!, { d4 }
vstmia.f64 YO!, { d4 }


.endm .endm


@@ -234,8 +234,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_S4X1 .macro KERNEL_S4X1


pld [ AO2 , #A_PRE ] pld [ AO2 , #A_PRE ]
fldmiad XO , { d2 }
fldmiad AO1 , { d8 - d11 }
vldmia.f64 XO , { d2 }
vldmia.f64 AO1 , { d8 - d11 }


vmla.f64 d12 , d2 , d8 vmla.f64 d12 , d2 , d8
add AO1, AO1, LDA add AO1, AO1, LDA
@@ -249,24 +249,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S4 .macro SAVE_S4


fldmiad YO, { d4 }
vldmia.f64 YO, { d4 }
vmla.f64 d4 , d0, d12 vmla.f64 d4 , d0, d12
fstmiad YO, { d4 }
vstmia.f64 YO, { d4 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmiad YO, { d5 }
vldmia.f64 YO, { d5 }
vmla.f64 d5 , d0, d13 vmla.f64 d5 , d0, d13
fstmiad YO, { d5 }
vstmia.f64 YO, { d5 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmiad YO, { d4 }
vldmia.f64 YO, { d4 }
vmla.f64 d4 , d0, d14 vmla.f64 d4 , d0, d14
fstmiad YO, { d4 }
vstmia.f64 YO, { d4 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmiad YO, { d5 }
vldmia.f64 YO, { d5 }
vmla.f64 d5 , d0, d15 vmla.f64 d5 , d0, d15
fstmiad YO, { d5 }
vstmia.f64 YO, { d5 }
add YO, YO, INC_Y add YO, YO, INC_Y


.endm .endm
@@ -282,8 +282,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1X1 .macro KERNEL_S1X1


fldmiad XO , { d2 }
fldmiad AO1 , { d8 }
vldmia.f64 XO , { d2 }
vldmia.f64 AO1 , { d8 }
vmla.f64 d12 , d2 , d8 vmla.f64 d12 , d2 , d8
add AO1, AO1, LDA add AO1, AO1, LDA
add XO, XO , INC_X add XO, XO , INC_X
@@ -292,9 +292,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S1 .macro SAVE_S1


fldmiad YO, { d4 }
vldmia.f64 YO, { d4 }
vmla.f64 d4, d0, d12 vmla.f64 d4, d0, d12
fstmiad YO , { d4 }
vstmia.f64 YO , { d4 }
add YO, YO, INC_Y add YO, YO, INC_Y


.endm .endm
@@ -338,8 +338,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F8X1 .macro KERNEL_F8X1


pld [ AO2, #A_PRE ] pld [ AO2, #A_PRE ]
fldmias XO! , { s2 }
fldmias AO1 , { s4 - s7 }
vldmia.f32 XO! , { s2 }
vldmia.f32 AO1 , { s4 - s7 }


vmla.f32 s8 , s2 , s4 vmla.f32 s8 , s2 , s4
vmla.f32 s9 , s2 , s5 vmla.f32 s9 , s2 , s5
@@ -348,7 +348,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


add r3, AO1, #4*SIZE add r3, AO1, #4*SIZE


fldmias r3 , { s4 - s7 }
vldmia.f32 r3 , { s4 - s7 }


vmla.f32 s12 , s2 , s4 vmla.f32 s12 , s2 , s4
vmla.f32 s13 , s2 , s5 vmla.f32 s13 , s2 , s5
@@ -362,24 +362,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_F8 .macro SAVE_F8


fldmias YO, { s4 - s7 }
vldmia.f32 YO, { s4 - s7 }


vmla.f32 s4 , s0, s8 vmla.f32 s4 , s0, s8
vmla.f32 s5 , s0, s9 vmla.f32 s5 , s0, s9
vmla.f32 s6 , s0, s10 vmla.f32 s6 , s0, s10
vmla.f32 s7 , s0, s11 vmla.f32 s7 , s0, s11


fstmias YO!, { s4 - s7 }
vstmia.f32 YO!, { s4 - s7 }




fldmias YO, { s4 - s7 }
vldmia.f32 YO, { s4 - s7 }


vmla.f32 s4 , s0, s12 vmla.f32 s4 , s0, s12
vmla.f32 s5 , s0, s13 vmla.f32 s5 , s0, s13
vmla.f32 s6 , s0, s14 vmla.f32 s6 , s0, s14
vmla.f32 s7 , s0, s15 vmla.f32 s7 , s0, s15


fstmias YO!, { s4 - s7 }
vstmia.f32 YO!, { s4 - s7 }


.endm .endm


@@ -394,8 +394,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1X1 .macro KERNEL_F1X1


fldmias XO! , { s2 }
fldmias AO1 , { s8 }
vldmia.f32 XO! , { s2 }
vldmia.f32 AO1 , { s8 }
vmla.f32 s12 , s2 , s8 vmla.f32 s12 , s2 , s8
add AO1, AO1, LDA add AO1, AO1, LDA


@@ -403,9 +403,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_F1 .macro SAVE_F1


fldmias YO, { s4 }
vldmia.f32 YO, { s4 }
vmla.f32 s4, s0, s12 vmla.f32 s4, s0, s12
fstmias YO!, { s4 }
vstmia.f32 YO!, { s4 }


.endm .endm


@@ -434,8 +434,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S4X1 .macro KERNEL_S4X1


fldmias XO , { s2 }
fldmias AO1 , { s8 - s11 }
vldmia.f32 XO , { s2 }
vldmia.f32 AO1 , { s8 - s11 }


vmla.f32 s12 , s2 , s8 vmla.f32 s12 , s2 , s8
vmla.f32 s13 , s2 , s9 vmla.f32 s13 , s2 , s9
@@ -449,24 +449,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S4 .macro SAVE_S4


fldmias YO, { s4 }
vldmia.f32 YO, { s4 }
vmla.f32 s4 , s0, s12 vmla.f32 s4 , s0, s12
fstmias YO, { s4 }
vstmia.f32 YO, { s4 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmias YO, { s5 }
vldmia.f32 YO, { s5 }
vmla.f32 s5 , s0, s13 vmla.f32 s5 , s0, s13
fstmias YO, { s5 }
vstmia.f32 YO, { s5 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmias YO, { s4 }
vldmia.f32 YO, { s4 }
vmla.f32 s4 , s0, s14 vmla.f32 s4 , s0, s14
fstmias YO, { s4 }
vstmia.f32 YO, { s4 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmias YO, { s5 }
vldmia.f32 YO, { s5 }
vmla.f32 s5 , s0, s15 vmla.f32 s5 , s0, s15
fstmias YO, { s5 }
vstmia.f32 YO, { s5 }
add YO, YO, INC_Y add YO, YO, INC_Y


.endm .endm
@@ -482,8 +482,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1X1 .macro KERNEL_S1X1


fldmias XO , { s2 }
fldmias AO1 , { s8 }
vldmia.f32 XO , { s2 }
vldmia.f32 AO1 , { s8 }
vmla.f32 s12 , s2 , s8 vmla.f32 s12 , s2 , s8
add AO1, AO1, LDA add AO1, AO1, LDA
add XO, XO , INC_X add XO, XO , INC_X
@@ -492,9 +492,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S1 .macro SAVE_S1


fldmias YO, { s4 }
vldmia.f32 YO, { s4 }
vmla.f32 s4, s0, s12 vmla.f32 s4, s0, s12
fstmias YO , { s4 }
vstmia.f32 YO , { s4 }
add YO, YO, INC_Y add YO, YO, INC_Y


.endm .endm


+ 60
- 60
kernel/arm/gemv_n_vfpv3.S View File

@@ -138,8 +138,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F8X1 .macro KERNEL_F8X1


fldmiad XO! , { d4 }
fldmiad AO1 , { d8 - d15 }
vldmia.f64 XO! , { d4 }
vldmia.f64 AO1 , { d8 - d15 }


vmla.f64 d24 , d4 , d8 vmla.f64 d24 , d4 , d8
pld [ AO2 , #A_PRE ] pld [ AO2 , #A_PRE ]
@@ -158,7 +158,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_F8 .macro SAVE_F8


fldmiad YO, { d16 - d23 }
vldmia.f64 YO, { d16 - d23 }


vmla.f64 d16, d0, d24 vmla.f64 d16, d0, d24
vmla.f64 d17, d0, d25 vmla.f64 d17, d0, d25
@@ -169,7 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmla.f64 d22, d0, d30 vmla.f64 d22, d0, d30
vmla.f64 d23, d0, d31 vmla.f64 d23, d0, d31


fstmiad YO!, { d16 - d23 }
vstmia.f64 YO!, { d16 - d23 }


.endm .endm


@@ -184,8 +184,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1X1 .macro KERNEL_F1X1


fldmiad XO! , { d4 }
fldmiad AO1 , { d8 }
vldmia.f64 XO! , { d4 }
vldmia.f64 AO1 , { d8 }
vmla.f64 d24 , d4 , d8 vmla.f64 d24 , d4 , d8
add AO1, AO1, LDA add AO1, AO1, LDA


@@ -193,9 +193,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_F1 .macro SAVE_F1


fldmiad YO, { d16 }
vldmia.f64 YO, { d16 }
vmla.f64 d16, d0, d24 vmla.f64 d16, d0, d24
fstmiad YO!, { d16 }
vstmia.f64 YO!, { d16 }


.endm .endm


@@ -234,8 +234,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


pld [ AO2 , #A_PRE ] pld [ AO2 , #A_PRE ]
pld [ AO2 , #A_PRE+32 ] pld [ AO2 , #A_PRE+32 ]
fldmiad XO , { d4 }
fldmiad AO1 , { d8 - d15 }
vldmia.f64 XO , { d4 }
vldmia.f64 AO1 , { d8 - d15 }


vmla.f64 d24 , d4 , d8 vmla.f64 d24 , d4 , d8
vmla.f64 d25 , d4 , d9 vmla.f64 d25 , d4 , d9
@@ -253,44 +253,44 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S8 .macro SAVE_S8


fldmiad YO, { d16 }
vldmia.f64 YO, { d16 }
vmla.f64 d16, d0, d24 vmla.f64 d16, d0, d24
fstmiad YO, { d16 }
vstmia.f64 YO, { d16 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmiad YO, { d17 }
vldmia.f64 YO, { d17 }
vmla.f64 d17, d0, d25 vmla.f64 d17, d0, d25
fstmiad YO, { d17 }
vstmia.f64 YO, { d17 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmiad YO, { d18 }
vldmia.f64 YO, { d18 }
vmla.f64 d18, d0, d26 vmla.f64 d18, d0, d26
fstmiad YO, { d18 }
vstmia.f64 YO, { d18 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmiad YO, { d19 }
vldmia.f64 YO, { d19 }
vmla.f64 d19, d0, d27 vmla.f64 d19, d0, d27
fstmiad YO, { d19 }
vstmia.f64 YO, { d19 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmiad YO, { d20 }
vldmia.f64 YO, { d20 }
vmla.f64 d20, d0, d28 vmla.f64 d20, d0, d28
fstmiad YO, { d20 }
vstmia.f64 YO, { d20 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmiad YO, { d21 }
vldmia.f64 YO, { d21 }
vmla.f64 d21, d0, d29 vmla.f64 d21, d0, d29
fstmiad YO, { d21 }
vstmia.f64 YO, { d21 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmiad YO, { d22 }
vldmia.f64 YO, { d22 }
vmla.f64 d22, d0, d30 vmla.f64 d22, d0, d30
fstmiad YO, { d22 }
vstmia.f64 YO, { d22 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmiad YO, { d23 }
vldmia.f64 YO, { d23 }
vmla.f64 d23, d0, d31 vmla.f64 d23, d0, d31
fstmiad YO, { d23 }
vstmia.f64 YO, { d23 }
add YO, YO, INC_Y add YO, YO, INC_Y


.endm .endm
@@ -306,8 +306,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1X1 .macro KERNEL_S1X1


fldmiad XO , { d4 }
fldmiad AO1 , { d8 }
vldmia.f64 XO , { d4 }
vldmia.f64 AO1 , { d8 }
vmla.f64 d24 , d4 , d8 vmla.f64 d24 , d4 , d8
add AO1, AO1, LDA add AO1, AO1, LDA
add XO, XO, INC_X add XO, XO, INC_X
@@ -316,9 +316,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S1 .macro SAVE_S1


fldmiad YO, { d16 }
vldmia.f64 YO, { d16 }
vmla.f64 d16, d0, d24 vmla.f64 d16, d0, d24
fstmiad YO, { d16 }
vstmia.f64 YO, { d16 }
add YO, YO, INC_Y add YO, YO, INC_Y


.endm .endm
@@ -361,8 +361,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F8X1 .macro KERNEL_F8X1


pld [ AO2 , #A_PRE ] pld [ AO2 , #A_PRE ]
fldmias XO! , { s4 }
fldmias AO1 , { s8 - s15 }
vldmia.f32 XO! , { s4 }
vldmia.f32 AO1 , { s8 - s15 }


vmla.f32 s24 , s4 , s8 vmla.f32 s24 , s4 , s8
vmla.f32 s25 , s4 , s9 vmla.f32 s25 , s4 , s9
@@ -379,7 +379,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_F8 .macro SAVE_F8


fldmias YO, { s16 - s23 }
vldmia.f32 YO, { s16 - s23 }


vmla.f32 s16, s0, s24 vmla.f32 s16, s0, s24
vmla.f32 s17, s0, s25 vmla.f32 s17, s0, s25
@@ -390,7 +390,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmla.f32 s22, s0, s30 vmla.f32 s22, s0, s30
vmla.f32 s23, s0, s31 vmla.f32 s23, s0, s31


fstmias YO!, { s16 - s23 }
vstmia.f32 YO!, { s16 - s23 }


.endm .endm


@@ -405,8 +405,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1X1 .macro KERNEL_F1X1


fldmias XO! , { s4 }
fldmias AO1 , { s8 }
vldmia.f32 XO! , { s4 }
vldmia.f32 AO1 , { s8 }
vmla.f32 s24 , s4 , s8 vmla.f32 s24 , s4 , s8
add AO1, AO1, LDA add AO1, AO1, LDA


@@ -414,9 +414,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_F1 .macro SAVE_F1


fldmias YO, { s16 }
vldmia.f32 YO, { s16 }
vmla.f32 s16, s0, s24 vmla.f32 s16, s0, s24
fstmias YO!, { s16 }
vstmia.f32 YO!, { s16 }


.endm .endm


@@ -454,8 +454,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_S8X1 .macro KERNEL_S8X1


pld [ AO2 , #A_PRE ] pld [ AO2 , #A_PRE ]
fldmias XO , { s4 }
fldmias AO1 , { s8 - s15 }
vldmia.f32 XO , { s4 }
vldmia.f32 AO1 , { s8 - s15 }


vmla.f32 s24 , s4 , s8 vmla.f32 s24 , s4 , s8
vmla.f32 s25 , s4 , s9 vmla.f32 s25 , s4 , s9
@@ -473,44 +473,44 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S8 .macro SAVE_S8


fldmias YO, { s16 }
vldmia.f32 YO, { s16 }
vmla.f32 s16, s0, s24 vmla.f32 s16, s0, s24
fstmias YO, { s16 }
vstmia.f32 YO, { s16 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmias YO, { s17 }
vldmia.f32 YO, { s17 }
vmla.f32 s17, s0, s25 vmla.f32 s17, s0, s25
fstmias YO, { s17 }
vstmia.f32 YO, { s17 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmias YO, { s18 }
vldmia.f32 YO, { s18 }
vmla.f32 s18, s0, s26 vmla.f32 s18, s0, s26
fstmias YO, { s18 }
vstmia.f32 YO, { s18 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmias YO, { s19 }
vldmia.f32 YO, { s19 }
vmla.f32 s19, s0, s27 vmla.f32 s19, s0, s27
fstmias YO, { s19 }
vstmia.f32 YO, { s19 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmias YO, { s20 }
vldmia.f32 YO, { s20 }
vmla.f32 s20, s0, s28 vmla.f32 s20, s0, s28
fstmias YO, { s20 }
vstmia.f32 YO, { s20 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmias YO, { s21 }
vldmia.f32 YO, { s21 }
vmla.f32 s21, s0, s29 vmla.f32 s21, s0, s29
fstmias YO, { s21 }
vstmia.f32 YO, { s21 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmias YO, { s22 }
vldmia.f32 YO, { s22 }
vmla.f32 s22, s0, s30 vmla.f32 s22, s0, s30
fstmias YO, { s22 }
vstmia.f32 YO, { s22 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmias YO, { s23 }
vldmia.f32 YO, { s23 }
vmla.f32 s23, s0, s31 vmla.f32 s23, s0, s31
fstmias YO, { s23 }
vstmia.f32 YO, { s23 }
add YO, YO, INC_Y add YO, YO, INC_Y


.endm .endm
@@ -526,8 +526,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1X1 .macro KERNEL_S1X1


fldmias XO , { s4 }
fldmias AO1 , { s8 }
vldmia.f32 XO , { s4 }
vldmia.f32 AO1 , { s8 }
vmla.f32 s24 , s4 , s8 vmla.f32 s24 , s4 , s8
add AO1, AO1, LDA add AO1, AO1, LDA
add XO, XO, INC_X add XO, XO, INC_X
@@ -536,9 +536,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S1 .macro SAVE_S1


fldmias YO, { s16 }
vldmia.f32 YO, { s16 }
vmla.f32 s16, s0, s24 vmla.f32 s16, s0, s24
fstmias YO, { s16 }
vstmia.f32 YO, { s16 }
add YO, YO, INC_Y add YO, YO, INC_Y


.endm .endm


+ 84
- 84
kernel/arm/gemv_t_vfp.S View File

@@ -112,13 +112,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F2X4 .macro KERNEL_F2X4


pld [ XO , #X_PRE ] pld [ XO , #X_PRE ]
fldmiad XO! , { d12 - d15 }
vldmia.f64 XO! , { d12 - d15 }
pld [ AO1 , #A_PRE ] pld [ AO1 , #A_PRE ]
fldmiad AO1!, { d8 - d9 }
vldmia.f64 AO1!, { d8 - d9 }
pld [ AO2 , #A_PRE ] pld [ AO2 , #A_PRE ]
fldmiad AO2!, { d4 - d5 }
fldmiad AO1!, { d10 - d11 }
fldmiad AO2!, { d6 - d7 }
vldmia.f64 AO2!, { d4 - d5 }
vldmia.f64 AO1!, { d10 - d11 }
vldmia.f64 AO2!, { d6 - d7 }


vmla.f64 d2 , d12 , d8 vmla.f64 d2 , d12 , d8
vmla.f64 d3 , d12 , d4 vmla.f64 d3 , d12 , d4
@@ -133,9 +133,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F2X1 .macro KERNEL_F2X1


fldmiad XO! , { d1 }
fldmiad AO1!, { d8 }
fldmiad AO2!, { d4 }
vldmia.f64 XO! , { d1 }
vldmia.f64 AO1!, { d8 }
vldmia.f64 AO2!, { d4 }
vmla.f64 d2 , d1 , d8 vmla.f64 d2 , d1 , d8
vmla.f64 d3 , d1 , d4 vmla.f64 d3 , d1 , d4


@@ -143,10 +143,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_F2 .macro SAVE_F2


fldmiad YO, { d4 - d5 }
vldmia.f64 YO, { d4 - d5 }
vmla.f64 d4, d0, d2 vmla.f64 d4, d0, d2
vmla.f64 d5, d0, d3 vmla.f64 d5, d0, d3
fstmiad YO!, { d4 - d5 }
vstmia.f64 YO!, { d4 - d5 }


.endm .endm


@@ -160,10 +160,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F1X4 .macro KERNEL_F1X4


pld [ XO , #X_PRE ] pld [ XO , #X_PRE ]
fldmiad XO! , { d12 - d15 }
vldmia.f64 XO! , { d12 - d15 }
pld [ AO1 , #A_PRE ] pld [ AO1 , #A_PRE ]
fldmiad AO1!, { d8 - d9 }
fldmiad AO1!, { d10 - d11 }
vldmia.f64 AO1!, { d8 - d9 }
vldmia.f64 AO1!, { d10 - d11 }
vmla.f64 d2 , d12 , d8 vmla.f64 d2 , d12 , d8
vmla.f64 d2 , d13 , d9 vmla.f64 d2 , d13 , d9
vmla.f64 d2 , d14, d10 vmla.f64 d2 , d14, d10
@@ -173,17 +173,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1X1 .macro KERNEL_F1X1


fldmiad XO! , { d1 }
fldmiad AO1!, { d8 }
vldmia.f64 XO! , { d1 }
vldmia.f64 AO1!, { d8 }
vmla.f64 d2 , d1 , d8 vmla.f64 d2 , d1 , d8


.endm .endm


.macro SAVE_F1 .macro SAVE_F1


fldmiad YO, { d4 }
vldmia.f64 YO, { d4 }
vmla.f64 d4, d0, d2 vmla.f64 d4, d0, d2
fstmiad YO!, { d4 }
vstmia.f64 YO!, { d4 }


.endm .endm


@@ -197,23 +197,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S2X4 .macro KERNEL_S2X4


fldmiad XO , { d12 }
vldmia.f64 XO , { d12 }
add XO, XO, INC_X add XO, XO, INC_X


pld [ AO1 , #A_PRE ] pld [ AO1 , #A_PRE ]
fldmiad AO1!, { d8 - d9 }
vldmia.f64 AO1!, { d8 - d9 }
pld [ AO2 , #A_PRE ] pld [ AO2 , #A_PRE ]
fldmiad AO2!, { d4 - d5 }
vldmia.f64 AO2!, { d4 - d5 }


fldmiad XO , { d13 }
vldmia.f64 XO , { d13 }
add XO, XO, INC_X add XO, XO, INC_X
fldmiad AO1!, { d10 - d11 }
fldmiad AO2!, { d6 - d7 }
vldmia.f64 AO1!, { d10 - d11 }
vldmia.f64 AO2!, { d6 - d7 }


fldmiad XO , { d14 }
vldmia.f64 XO , { d14 }
add XO, XO, INC_X add XO, XO, INC_X


fldmiad XO , { d15 }
vldmia.f64 XO , { d15 }
add XO, XO, INC_X add XO, XO, INC_X


vmla.f64 d2 , d12 , d8 vmla.f64 d2 , d12 , d8
@@ -229,9 +229,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S2X1 .macro KERNEL_S2X1


fldmiad XO , { d1 }
fldmiad AO1!, { d8 }
fldmiad AO2!, { d4 }
vldmia.f64 XO , { d1 }
vldmia.f64 AO1!, { d8 }
vldmia.f64 AO2!, { d4 }
vmla.f64 d2 , d1 , d8 vmla.f64 d2 , d1 , d8
add XO, XO, INC_X add XO, XO, INC_X
vmla.f64 d3 , d1 , d4 vmla.f64 d3 , d1 , d4
@@ -240,14 +240,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S2 .macro SAVE_S2


fldmiad YO, { d4 }
vldmia.f64 YO, { d4 }
vmla.f64 d4, d0, d2 vmla.f64 d4, d0, d2
fstmiad YO, { d4 }
vstmia.f64 YO, { d4 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmiad YO, { d5 }
vldmia.f64 YO, { d5 }
vmla.f64 d5, d0, d3 vmla.f64 d5, d0, d3
fstmiad YO, { d5 }
vstmia.f64 YO, { d5 }
add YO, YO, INC_Y add YO, YO, INC_Y


.endm .endm
@@ -261,20 +261,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1X4 .macro KERNEL_S1X4


fldmiad XO , { d12 }
vldmia.f64 XO , { d12 }
add XO, XO, INC_X add XO, XO, INC_X


pld [ AO1 , #A_PRE ] pld [ AO1 , #A_PRE ]
fldmiad AO1!, { d8 - d9 }
vldmia.f64 AO1!, { d8 - d9 }


fldmiad XO , { d13 }
vldmia.f64 XO , { d13 }
add XO, XO, INC_X add XO, XO, INC_X
fldmiad AO1!, { d10 - d11 }
vldmia.f64 AO1!, { d10 - d11 }


fldmiad XO , { d14 }
vldmia.f64 XO , { d14 }
add XO, XO, INC_X add XO, XO, INC_X


fldmiad XO , { d15 }
vldmia.f64 XO , { d15 }
add XO, XO, INC_X add XO, XO, INC_X


vmla.f64 d2 , d12 , d8 vmla.f64 d2 , d12 , d8
@@ -286,8 +286,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1X1 .macro KERNEL_S1X1


fldmiad XO , { d1 }
fldmiad AO1!, { d8 }
vldmia.f64 XO , { d1 }
vldmia.f64 AO1!, { d8 }
vmla.f64 d2 , d1 , d8 vmla.f64 d2 , d1 , d8
add XO, XO, INC_X add XO, XO, INC_X


@@ -295,9 +295,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S1 .macro SAVE_S1


fldmiad YO, { d4 }
vldmia.f64 YO, { d4 }
vmla.f64 d4, d0, d2 vmla.f64 d4, d0, d2
fstmiad YO, { d4 }
vstmia.f64 YO, { d4 }
add YO, YO, INC_Y add YO, YO, INC_Y


.endm .endm
@@ -315,11 +315,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F2X4 .macro KERNEL_F2X4


fldmias XO! , { s12 - s15 }
fldmias AO1!, { s8 - s9 }
fldmias AO2!, { s4 - s5 }
fldmias AO1!, { s10 - s11 }
fldmias AO2!, { s6 - s7 }
vldmia.f32 XO! , { s12 - s15 }
vldmia.f32 AO1!, { s8 - s9 }
vldmia.f32 AO2!, { s4 - s5 }
vldmia.f32 AO1!, { s10 - s11 }
vldmia.f32 AO2!, { s6 - s7 }


vmla.f32 s2 , s12 , s8 vmla.f32 s2 , s12 , s8
vmla.f32 s3 , s12 , s4 vmla.f32 s3 , s12 , s4
@@ -334,9 +334,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F2X1 .macro KERNEL_F2X1


fldmias XO! , { s1 }
fldmias AO1!, { s8 }
fldmias AO2!, { s4 }
vldmia.f32 XO! , { s1 }
vldmia.f32 AO1!, { s8 }
vldmia.f32 AO2!, { s4 }
vmla.f32 s2 , s1 , s8 vmla.f32 s2 , s1 , s8
vmla.f32 s3 , s1 , s4 vmla.f32 s3 , s1 , s4


@@ -344,10 +344,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_F2 .macro SAVE_F2


fldmias YO, { s4 - s5 }
vldmia.f32 YO, { s4 - s5 }
vmla.f32 s4, s0, s2 vmla.f32 s4, s0, s2
vmla.f32 s5, s0, s3 vmla.f32 s5, s0, s3
fstmias YO!, { s4 - s5 }
vstmia.f32 YO!, { s4 - s5 }


.endm .endm


@@ -359,9 +359,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1X4 .macro KERNEL_F1X4


fldmias XO! , { s12 - s15 }
fldmias AO1!, { s8 - s9 }
fldmias AO1!, { s10 - s11 }
vldmia.f32 XO! , { s12 - s15 }
vldmia.f32 AO1!, { s8 - s9 }
vldmia.f32 AO1!, { s10 - s11 }
vmla.f32 s2 , s12 , s8 vmla.f32 s2 , s12 , s8
vmla.f32 s2 , s13 , s9 vmla.f32 s2 , s13 , s9
vmla.f32 s2 , s14, s10 vmla.f32 s2 , s14, s10
@@ -371,17 +371,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1X1 .macro KERNEL_F1X1


fldmias XO! , { s1 }
fldmias AO1!, { s8 }
vldmia.f32 XO! , { s1 }
vldmia.f32 AO1!, { s8 }
vmla.f32 s2 , s1 , s8 vmla.f32 s2 , s1 , s8


.endm .endm


.macro SAVE_F1 .macro SAVE_F1


fldmias YO, { s4 }
vldmia.f32 YO, { s4 }
vmla.f32 s4, s0, s2 vmla.f32 s4, s0, s2
fstmias YO!, { s4 }
vstmia.f32 YO!, { s4 }


.endm .endm


@@ -395,21 +395,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S2X4 .macro KERNEL_S2X4


fldmias XO , { s12 }
vldmia.f32 XO , { s12 }
add XO, XO, INC_X add XO, XO, INC_X


fldmias AO1!, { s8 - s9 }
fldmias AO2!, { s4 - s5 }
vldmia.f32 AO1!, { s8 - s9 }
vldmia.f32 AO2!, { s4 - s5 }


fldmias XO , { s13 }
vldmia.f32 XO , { s13 }
add XO, XO, INC_X add XO, XO, INC_X
fldmias AO1!, { s10 - s11 }
fldmias AO2!, { s6 - s7 }
vldmia.f32 AO1!, { s10 - s11 }
vldmia.f32 AO2!, { s6 - s7 }


fldmias XO , { s14 }
vldmia.f32 XO , { s14 }
add XO, XO, INC_X add XO, XO, INC_X


fldmias XO , { s15 }
vldmia.f32 XO , { s15 }
add XO, XO, INC_X add XO, XO, INC_X


vmla.f32 s2 , s12 , s8 vmla.f32 s2 , s12 , s8
@@ -425,9 +425,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S2X1 .macro KERNEL_S2X1


fldmias XO , { s1 }
fldmias AO1!, { s8 }
fldmias AO2!, { s4 }
vldmia.f32 XO , { s1 }
vldmia.f32 AO1!, { s8 }
vldmia.f32 AO2!, { s4 }
vmla.f32 s2 , s1 , s8 vmla.f32 s2 , s1 , s8
add XO, XO, INC_X add XO, XO, INC_X
vmla.f32 s3 , s1 , s4 vmla.f32 s3 , s1 , s4
@@ -436,14 +436,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S2 .macro SAVE_S2


fldmias YO, { s4 }
vldmia.f32 YO, { s4 }
vmla.f32 s4, s0, s2 vmla.f32 s4, s0, s2
fstmias YO, { s4 }
vstmia.f32 YO, { s4 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmias YO, { s5 }
vldmia.f32 YO, { s5 }
vmla.f32 s5, s0, s3 vmla.f32 s5, s0, s3
fstmias YO, { s5 }
vstmia.f32 YO, { s5 }
add YO, YO, INC_Y add YO, YO, INC_Y


.endm .endm
@@ -456,20 +456,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1X4 .macro KERNEL_S1X4


fldmias XO , { s12 }
vldmia.f32 XO , { s12 }
add XO, XO, INC_X add XO, XO, INC_X


pld [ AO1 , #A_PRE ] pld [ AO1 , #A_PRE ]
fldmias AO1!, { s8 - s9 }
vldmia.f32 AO1!, { s8 - s9 }


fldmias XO , { s13 }
vldmia.f32 XO , { s13 }
add XO, XO, INC_X add XO, XO, INC_X
fldmias AO1!, { s10 - s11 }
vldmia.f32 AO1!, { s10 - s11 }


fldmias XO , { s14 }
vldmia.f32 XO , { s14 }
add XO, XO, INC_X add XO, XO, INC_X


fldmias XO , { s15 }
vldmia.f32 XO , { s15 }
add XO, XO, INC_X add XO, XO, INC_X


vmla.f32 s2 , s12 , s8 vmla.f32 s2 , s12 , s8
@@ -481,8 +481,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1X1 .macro KERNEL_S1X1


fldmias XO , { s1 }
fldmias AO1!, { s8 }
vldmia.f32 XO , { s1 }
vldmia.f32 AO1!, { s8 }
vmla.f32 s2 , s1 , s8 vmla.f32 s2 , s1 , s8
add XO, XO, INC_X add XO, XO, INC_X


@@ -490,9 +490,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S1 .macro SAVE_S1


fldmias YO, { s4 }
vldmia.f32 YO, { s4 }
vmla.f32 s4, s0, s2 vmla.f32 s4, s0, s2
fstmias YO, { s4 }
vstmia.f32 YO, { s4 }
add YO, YO, INC_Y add YO, YO, INC_Y


.endm .endm


+ 84
- 84
kernel/arm/gemv_t_vfpv3.S View File

@@ -108,17 +108,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F2X4 .macro KERNEL_F2X4


pld [ XO , #X_PRE ] pld [ XO , #X_PRE ]
fldmiad XO! , { d28 - d31 }
vldmia.f64 XO! , { d28 - d31 }
pld [ AO1 , #A_PRE ] pld [ AO1 , #A_PRE ]
fldmiad AO1!, { d8 - d9 }
vldmia.f64 AO1!, { d8 - d9 }
pld [ AO2 , #A_PRE ] pld [ AO2 , #A_PRE ]
fldmiad AO2!, { d16 - d17 }
vldmia.f64 AO2!, { d16 - d17 }
vmla.f64 d4 , d28 , d8 vmla.f64 d4 , d28 , d8
vmla.f64 d5 , d28 , d16 vmla.f64 d5 , d28 , d16
fldmiad AO1!, { d10 - d11 }
vldmia.f64 AO1!, { d10 - d11 }
vmla.f64 d4 , d29 , d9 vmla.f64 d4 , d29 , d9
vmla.f64 d5 , d29 , d17 vmla.f64 d5 , d29 , d17
fldmiad AO2!, { d18 - d19 }
vldmia.f64 AO2!, { d18 - d19 }
vmla.f64 d4 , d30, d10 vmla.f64 d4 , d30, d10
vmla.f64 d5 , d30, d18 vmla.f64 d5 , d30, d18
vmla.f64 d4 , d31, d11 vmla.f64 d4 , d31, d11
@@ -129,9 +129,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F2X1 .macro KERNEL_F2X1


fldmiad XO! , { d2 }
fldmiad AO1!, { d8 }
fldmiad AO2!, { d16 }
vldmia.f64 XO! , { d2 }
vldmia.f64 AO1!, { d8 }
vldmia.f64 AO2!, { d16 }
vmla.f64 d4 , d2 , d8 vmla.f64 d4 , d2 , d8
vmla.f64 d5 , d2 , d16 vmla.f64 d5 , d2 , d16


@@ -139,10 +139,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_F2 .macro SAVE_F2


fldmiad YO, { d24 - d25 }
vldmia.f64 YO, { d24 - d25 }
vmla.f64 d24, d0, d4 vmla.f64 d24, d0, d4
vmla.f64 d25, d0, d5 vmla.f64 d25, d0, d5
fstmiad YO!, { d24 - d25 }
vstmia.f64 YO!, { d24 - d25 }


.endm .endm


@@ -156,23 +156,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_S2X4 .macro KERNEL_S2X4


pld [ AO1 , #A_PRE ] pld [ AO1 , #A_PRE ]
fldmiad XO , { d28 }
vldmia.f64 XO , { d28 }
add XO, XO, INC_X add XO, XO, INC_X
fldmiad AO1!, { d8 - d9 }
vldmia.f64 AO1!, { d8 - d9 }
pld [ AO2 , #A_PRE ] pld [ AO2 , #A_PRE ]
fldmiad AO2!, { d16 - d17 }
vldmia.f64 AO2!, { d16 - d17 }
vmla.f64 d4 , d28 , d8 vmla.f64 d4 , d28 , d8
fldmiad XO , { d29 }
vldmia.f64 XO , { d29 }
add XO, XO, INC_X add XO, XO, INC_X
vmla.f64 d5 , d28 , d16 vmla.f64 d5 , d28 , d16
fldmiad AO1!, { d10 - d11 }
vldmia.f64 AO1!, { d10 - d11 }
vmla.f64 d4 , d29 , d9 vmla.f64 d4 , d29 , d9
fldmiad XO , { d30 }
vldmia.f64 XO , { d30 }
add XO, XO, INC_X add XO, XO, INC_X
vmla.f64 d5 , d29 , d17 vmla.f64 d5 , d29 , d17
fldmiad AO2!, { d18 - d19 }
vldmia.f64 AO2!, { d18 - d19 }
vmla.f64 d4 , d30, d10 vmla.f64 d4 , d30, d10
fldmiad XO , { d31 }
vldmia.f64 XO , { d31 }
add XO, XO, INC_X add XO, XO, INC_X
vmla.f64 d5 , d30, d18 vmla.f64 d5 , d30, d18
vmla.f64 d4 , d31, d11 vmla.f64 d4 , d31, d11
@@ -183,10 +183,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S2X1 .macro KERNEL_S2X1


fldmiad XO , { d2 }
fldmiad AO1!, { d8 }
vldmia.f64 XO , { d2 }
vldmia.f64 AO1!, { d8 }
add XO, XO, INC_X add XO, XO, INC_X
fldmiad AO2!, { d16 }
vldmia.f64 AO2!, { d16 }
vmla.f64 d4 , d2 , d8 vmla.f64 d4 , d2 , d8
vmla.f64 d5 , d2 , d16 vmla.f64 d5 , d2 , d16


@@ -194,14 +194,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S2 .macro SAVE_S2


fldmiad YO, { d24 }
vldmia.f64 YO, { d24 }
vmla.f64 d24, d0, d4 vmla.f64 d24, d0, d4
fstmiad YO, { d24 }
vstmia.f64 YO, { d24 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmiad YO, { d24 }
vldmia.f64 YO, { d24 }
vmla.f64 d24, d0, d5 vmla.f64 d24, d0, d5
fstmiad YO, { d24 }
vstmia.f64 YO, { d24 }
add YO, YO, INC_Y add YO, YO, INC_Y


.endm .endm
@@ -215,11 +215,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F1X4 .macro KERNEL_F1X4


pld [ XO , #X_PRE ] pld [ XO , #X_PRE ]
fldmiad XO! , { d28 - d31 }
vldmia.f64 XO! , { d28 - d31 }
pld [ AO1 , #A_PRE ] pld [ AO1 , #A_PRE ]
fldmiad AO1!, { d8 - d9 }
vldmia.f64 AO1!, { d8 - d9 }
vmla.f64 d4 , d28 , d8 vmla.f64 d4 , d28 , d8
fldmiad AO1!, { d10 - d11 }
vldmia.f64 AO1!, { d10 - d11 }
vmla.f64 d4 , d29 , d9 vmla.f64 d4 , d29 , d9
vmla.f64 d4 , d30, d10 vmla.f64 d4 , d30, d10
vmla.f64 d4 , d31, d11 vmla.f64 d4 , d31, d11
@@ -229,17 +229,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1X1 .macro KERNEL_F1X1


fldmiad XO! , { d2 }
fldmiad AO1!, { d8 }
vldmia.f64 XO! , { d2 }
vldmia.f64 AO1!, { d8 }
vmla.f64 d4 , d2 , d8 vmla.f64 d4 , d2 , d8


.endm .endm


.macro SAVE_F1 .macro SAVE_F1


fldmiad YO, { d24 }
vldmia.f64 YO, { d24 }
vmla.f64 d24, d0, d4 vmla.f64 d24, d0, d4
fstmiad YO!, { d24 }
vstmia.f64 YO!, { d24 }


.endm .endm


@@ -252,18 +252,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_S1X4 .macro KERNEL_S1X4


pld [ AO1 , #A_PRE ] pld [ AO1 , #A_PRE ]
fldmiad XO , { d28 }
vldmia.f64 XO , { d28 }
add XO, XO, INC_X add XO, XO, INC_X
fldmiad AO1!, { d8 - d9 }
vldmia.f64 AO1!, { d8 - d9 }
vmla.f64 d4 , d28 , d8 vmla.f64 d4 , d28 , d8
fldmiad XO , { d29 }
vldmia.f64 XO , { d29 }
add XO, XO, INC_X add XO, XO, INC_X
fldmiad AO1!, { d10 - d11 }
vldmia.f64 AO1!, { d10 - d11 }
vmla.f64 d4 , d29 , d9 vmla.f64 d4 , d29 , d9
fldmiad XO , { d30 }
vldmia.f64 XO , { d30 }
add XO, XO, INC_X add XO, XO, INC_X
vmla.f64 d4 , d30, d10 vmla.f64 d4 , d30, d10
fldmiad XO , { d31 }
vldmia.f64 XO , { d31 }
add XO, XO, INC_X add XO, XO, INC_X
vmla.f64 d4 , d31, d11 vmla.f64 d4 , d31, d11


@@ -272,8 +272,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1X1 .macro KERNEL_S1X1


fldmiad XO , { d2 }
fldmiad AO1!, { d8 }
vldmia.f64 XO , { d2 }
vldmia.f64 AO1!, { d8 }
add XO, XO, INC_X add XO, XO, INC_X
vmla.f64 d4 , d2 , d8 vmla.f64 d4 , d2 , d8


@@ -281,9 +281,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S1 .macro SAVE_S1


fldmiad YO, { d24 }
vldmia.f64 YO, { d24 }
vmla.f64 d24, d0, d4 vmla.f64 d24, d0, d4
fstmiad YO, { d24 }
vstmia.f64 YO, { d24 }
add YO, YO, INC_Y add YO, YO, INC_Y


.endm .endm
@@ -300,15 +300,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F2X4 .macro KERNEL_F2X4


fldmias XO! , { s28 - s31 }
fldmias AO1!, { s8 - s9 }
fldmias AO2!, { s16 - s17 }
vldmia.f32 XO! , { s28 - s31 }
vldmia.f32 AO1!, { s8 - s9 }
vldmia.f32 AO2!, { s16 - s17 }
vmla.f32 s4 , s28 , s8 vmla.f32 s4 , s28 , s8
vmla.f32 s5 , s28 , s16 vmla.f32 s5 , s28 , s16
fldmias AO1!, { s10 - s11 }
vldmia.f32 AO1!, { s10 - s11 }
vmla.f32 s4 , s29 , s9 vmla.f32 s4 , s29 , s9
vmla.f32 s5 , s29 , s17 vmla.f32 s5 , s29 , s17
fldmias AO2!, { s18 - s19 }
vldmia.f32 AO2!, { s18 - s19 }
vmla.f32 s4 , s30, s10 vmla.f32 s4 , s30, s10
vmla.f32 s5 , s30, s18 vmla.f32 s5 , s30, s18
vmla.f32 s4 , s31, s11 vmla.f32 s4 , s31, s11
@@ -319,9 +319,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F2X1 .macro KERNEL_F2X1


fldmias XO! , { s2 }
fldmias AO1!, { s8 }
fldmias AO2!, { s16 }
vldmia.f32 XO! , { s2 }
vldmia.f32 AO1!, { s8 }
vldmia.f32 AO2!, { s16 }
vmla.f32 s4 , s2 , s8 vmla.f32 s4 , s2 , s8
vmla.f32 s5 , s2 , s16 vmla.f32 s5 , s2 , s16


@@ -329,10 +329,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_F2 .macro SAVE_F2


fldmias YO, { s24 - s25 }
vldmia.f32 YO, { s24 - s25 }
vmla.f32 s24, s0, s4 vmla.f32 s24, s0, s4
vmla.f32 s25, s0, s5 vmla.f32 s25, s0, s5
fstmias YO!, { s24 - s25 }
vstmia.f32 YO!, { s24 - s25 }


.endm .endm


@@ -345,22 +345,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S2X4 .macro KERNEL_S2X4


fldmias XO , { s28 }
vldmia.f32 XO , { s28 }
add XO, XO, INC_X add XO, XO, INC_X
fldmias AO1!, { s8 - s9 }
fldmias AO2!, { s16 - s17 }
vldmia.f32 AO1!, { s8 - s9 }
vldmia.f32 AO2!, { s16 - s17 }
vmla.f32 s4 , s28 , s8 vmla.f32 s4 , s28 , s8
fldmias XO , { s29 }
vldmia.f32 XO , { s29 }
add XO, XO, INC_X add XO, XO, INC_X
vmla.f32 s5 , s28 , s16 vmla.f32 s5 , s28 , s16
fldmias AO1!, { s10 - s11 }
vldmia.f32 AO1!, { s10 - s11 }
vmla.f32 s4 , s29 , s9 vmla.f32 s4 , s29 , s9
fldmias XO , { s30 }
vldmia.f32 XO , { s30 }
add XO, XO, INC_X add XO, XO, INC_X
vmla.f32 s5 , s29 , s17 vmla.f32 s5 , s29 , s17
fldmias AO2!, { s18 - s19 }
vldmia.f32 AO2!, { s18 - s19 }
vmla.f32 s4 , s30, s10 vmla.f32 s4 , s30, s10
fldmias XO , { s31 }
vldmia.f32 XO , { s31 }
add XO, XO, INC_X add XO, XO, INC_X
vmla.f32 s5 , s30, s18 vmla.f32 s5 , s30, s18
vmla.f32 s4 , s31, s11 vmla.f32 s4 , s31, s11
@@ -371,10 +371,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S2X1 .macro KERNEL_S2X1


fldmias XO , { s2 }
fldmias AO1!, { s8 }
vldmia.f32 XO , { s2 }
vldmia.f32 AO1!, { s8 }
add XO, XO, INC_X add XO, XO, INC_X
fldmias AO2!, { s16 }
vldmia.f32 AO2!, { s16 }
vmla.f32 s4 , s2 , s8 vmla.f32 s4 , s2 , s8
vmla.f32 s5 , s2 , s16 vmla.f32 s5 , s2 , s16


@@ -382,14 +382,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S2 .macro SAVE_S2


fldmias YO, { s24 }
vldmia.f32 YO, { s24 }
vmla.f32 s24, s0, s4 vmla.f32 s24, s0, s4
fstmias YO, { s24 }
vstmia.f32 YO, { s24 }
add YO, YO, INC_Y add YO, YO, INC_Y


fldmias YO, { s24 }
vldmia.f32 YO, { s24 }
vmla.f32 s24, s0, s5 vmla.f32 s24, s0, s5
fstmias YO, { s24 }
vstmia.f32 YO, { s24 }
add YO, YO, INC_Y add YO, YO, INC_Y


.endm .endm
@@ -402,10 +402,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1X4 .macro KERNEL_F1X4


fldmias XO! , { s28 - s31 }
fldmias AO1!, { s8 - s9 }
vldmia.f32 XO! , { s28 - s31 }
vldmia.f32 AO1!, { s8 - s9 }
vmla.f32 s4 , s28 , s8 vmla.f32 s4 , s28 , s8
fldmias AO1!, { s10 - s11 }
vldmia.f32 AO1!, { s10 - s11 }
vmla.f32 s4 , s29 , s9 vmla.f32 s4 , s29 , s9
vmla.f32 s4 , s30, s10 vmla.f32 s4 , s30, s10
vmla.f32 s4 , s31, s11 vmla.f32 s4 , s31, s11
@@ -415,17 +415,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1X1 .macro KERNEL_F1X1


fldmias XO! , { s2 }
fldmias AO1!, { s8 }
vldmia.f32 XO! , { s2 }
vldmia.f32 AO1!, { s8 }
vmla.f32 s4 , s2 , s8 vmla.f32 s4 , s2 , s8


.endm .endm


.macro SAVE_F1 .macro SAVE_F1


fldmias YO, { s24 }
vldmia.f32 YO, { s24 }
vmla.f32 s24, s0, s4 vmla.f32 s24, s0, s4
fstmias YO!, { s24 }
vstmia.f32 YO!, { s24 }


.endm .endm


@@ -437,18 +437,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1X4 .macro KERNEL_S1X4


fldmias XO , { s28 }
vldmia.f32 XO , { s28 }
add XO, XO, INC_X add XO, XO, INC_X
fldmias AO1!, { s8 - s9 }
vldmia.f32 AO1!, { s8 - s9 }
vmla.f32 s4 , s28 , s8 vmla.f32 s4 , s28 , s8
fldmias XO , { s29 }
vldmia.f32 XO , { s29 }
add XO, XO, INC_X add XO, XO, INC_X
fldmias AO1!, { s10 - s11 }
vldmia.f32 AO1!, { s10 - s11 }
vmla.f32 s4 , s29 , s9 vmla.f32 s4 , s29 , s9
fldmias XO , { s30 }
vldmia.f32 XO , { s30 }
add XO, XO, INC_X add XO, XO, INC_X
vmla.f32 s4 , s30, s10 vmla.f32 s4 , s30, s10
fldmias XO , { s31 }
vldmia.f32 XO , { s31 }
add XO, XO, INC_X add XO, XO, INC_X
vmla.f32 s4 , s31, s11 vmla.f32 s4 , s31, s11


@@ -457,8 +457,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1X1 .macro KERNEL_S1X1


fldmias XO , { s2 }
fldmias AO1!, { s8 }
vldmia.f32 XO , { s2 }
vldmia.f32 AO1!, { s8 }
add XO, XO, INC_X add XO, XO, INC_X
vmla.f32 s4 , s2 , s8 vmla.f32 s4 , s2 , s8


@@ -466,9 +466,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S1 .macro SAVE_S1


fldmias YO, { s24 }
vldmia.f32 YO, { s24 }
vmla.f32 s24, s0, s4 vmla.f32 s24, s0, s4
fstmias YO, { s24 }
vstmia.f32 YO, { s24 }
add YO, YO, INC_Y add YO, YO, INC_Y


.endm .endm


+ 16
- 16
kernel/arm/iamax_vfp.S View File

@@ -114,7 +114,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F .macro INIT_F


fldmiad X!, { d0 }
vldmia.f64 X!, { d0 }
VABS( d0, d0 ) VABS( d0, d0 )
mov Z, #1 mov Z, #1
mov INDEX, Z mov INDEX, Z
@@ -123,7 +123,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X!, { d4 }
vldmia.f64 X!, { d4 }
add Z, Z, #1 add Z, Z, #1
VABS( d4, d4 ) VABS( d4, d4 )
vcmpe.f64 d4, d0 vcmpe.f64 d4, d0
@@ -135,7 +135,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S .macro INIT_S


fldmiad X, { d0 }
vldmia.f64 X, { d0 }
VABS( d0, d0 ) VABS( d0, d0 )
mov Z, #1 mov Z, #1
mov INDEX, Z mov INDEX, Z
@@ -146,7 +146,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X, { d4 }
vldmia.f64 X, { d4 }
add Z, Z, #1 add Z, Z, #1
VABS( d4, d4 ) VABS( d4, d4 )
vcmpe.f64 d4, d0 vcmpe.f64 d4, d0
@@ -161,7 +161,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F .macro INIT_F


fldmias X!, { s0 }
vldmia.f32 X!, { s0 }
VABS( s0, s0 ) VABS( s0, s0 )
mov Z, #1 mov Z, #1
mov INDEX, Z mov INDEX, Z
@@ -170,7 +170,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmias X!, { s4 }
vldmia.f32 X!, { s4 }
add Z, Z, #1 add Z, Z, #1
VABS( s4, s4 ) VABS( s4, s4 )
vcmpe.f32 s4, s0 vcmpe.f32 s4, s0
@@ -182,7 +182,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S .macro INIT_S


fldmias X, { s0 }
vldmia.f32 X, { s0 }
VABS( s0, s0 ) VABS( s0, s0 )
mov Z, #1 mov Z, #1
mov INDEX, Z mov INDEX, Z
@@ -193,7 +193,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s4 }
vldmia.f32 X, { s4 }
add Z, Z, #1 add Z, Z, #1
VABS( s4, s4 ) VABS( s4, s4 )
vcmpe.f32 s4, s0 vcmpe.f32 s4, s0
@@ -215,7 +215,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F .macro INIT_F


fldmiad X!, { d0 -d1 }
vldmia.f64 X!, { d0 -d1 }
vabs.f64 d0, d0 vabs.f64 d0, d0
vabs.f64 d1, d1 vabs.f64 d1, d1
vadd.f64 d0 , d0, d1 vadd.f64 d0 , d0, d1
@@ -227,7 +227,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X!, { d4 - d5 }
vldmia.f64 X!, { d4 - d5 }
add Z, Z, #1 add Z, Z, #1
vabs.f64 d4, d4 vabs.f64 d4, d4
vabs.f64 d5, d5 vabs.f64 d5, d5
@@ -241,7 +241,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S .macro INIT_S


fldmiad X, { d0 -d1 }
vldmia.f64 X, { d0 -d1 }
vabs.f64 d0, d0 vabs.f64 d0, d0
vabs.f64 d1, d1 vabs.f64 d1, d1
vadd.f64 d0 , d0, d1 vadd.f64 d0 , d0, d1
@@ -255,7 +255,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }
add Z, Z, #1 add Z, Z, #1
vabs.f64 d4, d4 vabs.f64 d4, d4
vabs.f64 d5, d5 vabs.f64 d5, d5
@@ -272,7 +272,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F .macro INIT_F


fldmias X!, { s0 -s1 }
vldmia.f32 X!, { s0 -s1 }
vabs.f32 s0, s0 vabs.f32 s0, s0
vabs.f32 s1, s1 vabs.f32 s1, s1
vadd.f32 s0 , s0, s1 vadd.f32 s0 , s0, s1
@@ -284,7 +284,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmias X!, { s4 - s5 }
vldmia.f32 X!, { s4 - s5 }
add Z, Z, #1 add Z, Z, #1
vabs.f32 s4, s4 vabs.f32 s4, s4
vabs.f32 s5, s5 vabs.f32 s5, s5
@@ -298,7 +298,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S .macro INIT_S


fldmias X, { s0 -s1 }
vldmia.f32 X, { s0 -s1 }
vabs.f32 s0, s0 vabs.f32 s0, s0
vabs.f32 s1, s1 vabs.f32 s1, s1
vadd.f32 s0 , s0, s1 vadd.f32 s0 , s0, s1
@@ -312,7 +312,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }
add Z, Z, #1 add Z, Z, #1
vabs.f32 s4, s4 vabs.f32 s4, s4
vabs.f32 s5, s5 vabs.f32 s5, s5


+ 8
- 8
kernel/arm/nrm2_vfp.S View File

@@ -58,7 +58,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X!, { d4 }
vldmia.f64 X!, { d4 }
vcmpe.f64 d4, d6 // compare with 0.0 vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_F1_NEXT_\@ beq KERNEL_F1_NEXT_\@
@@ -95,7 +95,7 @@ KERNEL_F1_NEXT_\@:


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vcmpe.f64 d4, d6 // compare with 0.0 vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_S1_NEXT beq KERNEL_S1_NEXT
@@ -121,7 +121,7 @@ KERNEL_S1_NEXT:


.macro KERNEL_F1 .macro KERNEL_F1


fldmias X!, { s4 }
vldmia.f32 X!, { s4 }
vcmpe.f32 s4, s6 // compare with 0.0 vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_F1_NEXT_\@ beq KERNEL_F1_NEXT_\@
@@ -158,7 +158,7 @@ KERNEL_F1_NEXT_\@:


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s4 }
vldmia.f32 X, { s4 }
vcmpe.f32 s4, s6 // compare with 0.0 vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_S1_NEXT beq KERNEL_S1_NEXT
@@ -191,7 +191,7 @@ KERNEL_S1_NEXT:


.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X!, { d4 - d5 }
vldmia.f64 X!, { d4 - d5 }


vcmpe.f64 d4, d6 // compare with 0.0 vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
@@ -249,7 +249,7 @@ KERNEL_F1_END_\@:


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }


vcmpe.f64 d4, d6 // compare with 0.0 vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
@@ -294,7 +294,7 @@ KERNEL_S1_END_\@:


.macro KERNEL_F1 .macro KERNEL_F1


fldmias X!, { s4 - s5 }
vldmia.f32 X!, { s4 - s5 }


vcmpe.f32 s4, s6 // compare with 0.0 vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
@@ -350,7 +350,7 @@ KERNEL_F1_END_\@:


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }


vcmpe.f32 s4, s6 // compare with 0.0 vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr


+ 8
- 8
kernel/arm/nrm2_vfpv3.S View File

@@ -58,7 +58,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X!, { d4 }
vldmia.f64 X!, { d4 }
vcmpe.f64 d4, d6 // compare with 0.0 vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_F1_NEXT_\@ beq KERNEL_F1_NEXT_\@
@@ -95,7 +95,7 @@ KERNEL_F1_NEXT_\@:


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vcmpe.f64 d4, d6 // compare with 0.0 vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_S1_NEXT beq KERNEL_S1_NEXT
@@ -121,7 +121,7 @@ KERNEL_S1_NEXT:


.macro KERNEL_F1 .macro KERNEL_F1


fldmias X!, { s4 }
vldmia.f32 X!, { s4 }
vcmpe.f32 s4, s6 // compare with 0.0 vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_F1_NEXT_\@ beq KERNEL_F1_NEXT_\@
@@ -158,7 +158,7 @@ KERNEL_F1_NEXT_\@:


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s4 }
vldmia.f32 X, { s4 }
vcmpe.f32 s4, s6 // compare with 0.0 vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_S1_NEXT beq KERNEL_S1_NEXT
@@ -191,7 +191,7 @@ KERNEL_S1_NEXT:


.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X!, { d4 - d5 }
vldmia.f64 X!, { d4 - d5 }


vcmpe.f64 d4, d6 // compare with 0.0 vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
@@ -249,7 +249,7 @@ KERNEL_F1_END_\@:


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }


vcmpe.f64 d4, d6 // compare with 0.0 vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
@@ -294,7 +294,7 @@ KERNEL_S1_END_\@:


.macro KERNEL_F1 .macro KERNEL_F1


fldmias X!, { s4 - s5 }
vldmia.f32 X!, { s4 - s5 }


vcmpe.f32 s4, s6 // compare with 0.0 vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
@@ -350,7 +350,7 @@ KERNEL_F1_END_\@:


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }


vcmpe.f32 s4, s6 // compare with 0.0 vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr


+ 112
- 112
kernel/arm/rot_vfp.S View File

@@ -77,68 +77,68 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ X, #X_PRE ] pld [ X, #X_PRE ]
pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]


fldmiad X, { d4 }
fldmiad Y, { d5 }
vldmia.f64 X, { d4 }
vldmia.f64 Y, { d5 }
vmul.f64 d2 , d0, d4 vmul.f64 d2 , d0, d4
fmacd d2 , d1, d5 fmacd d2 , d1, d5
vmul.f64 d3 , d0, d5 vmul.f64 d3 , d0, d5
vmls.f64 d3 , d1, d4 vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }


fldmiad X, { d4 }
fldmiad Y, { d5 }
vldmia.f64 X, { d4 }
vldmia.f64 Y, { d5 }
vmul.f64 d2 , d0, d4 vmul.f64 d2 , d0, d4
fmacd d2 , d1, d5 fmacd d2 , d1, d5
vmul.f64 d3 , d0, d5 vmul.f64 d3 , d0, d5
vmls.f64 d3 , d1, d4 vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }


fldmiad X, { d4 }
fldmiad Y, { d5 }
vldmia.f64 X, { d4 }
vldmia.f64 Y, { d5 }
vmul.f64 d2 , d0, d4 vmul.f64 d2 , d0, d4
fmacd d2 , d1, d5 fmacd d2 , d1, d5
vmul.f64 d3 , d0, d5 vmul.f64 d3 , d0, d5
vmls.f64 d3 , d1, d4 vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }


fldmiad X, { d4 }
fldmiad Y, { d5 }
vldmia.f64 X, { d4 }
vldmia.f64 Y, { d5 }
vmul.f64 d2 , d0, d4 vmul.f64 d2 , d0, d4
fmacd d2 , d1, d5 fmacd d2 , d1, d5
vmul.f64 d3 , d0, d5 vmul.f64 d3 , d0, d5
vmls.f64 d3 , d1, d4 vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }


.endm .endm




.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X, { d4 }
fldmiad Y, { d5 }
vldmia.f64 X, { d4 }
vldmia.f64 Y, { d5 }
vmul.f64 d2 , d0, d4 vmul.f64 d2 , d0, d4
fmacd d2 , d1, d5 fmacd d2 , d1, d5
vmul.f64 d3 , d0, d5 vmul.f64 d3 , d0, d5
vmls.f64 d3 , d1, d4 vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }


.endm .endm


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X, { d4 }
fldmiad Y, { d5 }
vldmia.f64 X, { d4 }
vldmia.f64 Y, { d5 }
vmul.f64 d2 , d0, d4 vmul.f64 d2 , d0, d4
fmacd d2 , d1, d5 fmacd d2 , d1, d5
vmul.f64 d3 , d0, d5 vmul.f64 d3 , d0, d5
vmls.f64 d3 , d1, d4 vmls.f64 d3 , d1, d4
fstmiad X, { d2 }
fstmiad Y, { d3 }
vstmia.f64 X, { d2 }
vstmia.f64 Y, { d3 }


add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y
@@ -149,68 +149,68 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F4 .macro KERNEL_F4


fldmias X, { s4 }
fldmias Y, { s5 }
vldmia.f32 X, { s4 }
vldmia.f32 Y, { s5 }
vmul.f32 s2 , s0, s4 vmul.f32 s2 , s0, s4
fmacs s2 , s1, s5 fmacs s2 , s1, s5
vmul.f32 s3 , s0, s5 vmul.f32 s3 , s0, s5
vmls.f32 s3 , s1, s4 vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }


fldmias X, { s4 }
fldmias Y, { s5 }
vldmia.f32 X, { s4 }
vldmia.f32 Y, { s5 }
vmul.f32 s2 , s0, s4 vmul.f32 s2 , s0, s4
fmacs s2 , s1, s5 fmacs s2 , s1, s5
vmul.f32 s3 , s0, s5 vmul.f32 s3 , s0, s5
vmls.f32 s3 , s1, s4 vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }


fldmias X, { s4 }
fldmias Y, { s5 }
vldmia.f32 X, { s4 }
vldmia.f32 Y, { s5 }
vmul.f32 s2 , s0, s4 vmul.f32 s2 , s0, s4
fmacs s2 , s1, s5 fmacs s2 , s1, s5
vmul.f32 s3 , s0, s5 vmul.f32 s3 , s0, s5
vmls.f32 s3 , s1, s4 vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }


fldmias X, { s4 }
fldmias Y, { s5 }
vldmia.f32 X, { s4 }
vldmia.f32 Y, { s5 }
vmul.f32 s2 , s0, s4 vmul.f32 s2 , s0, s4
fmacs s2 , s1, s5 fmacs s2 , s1, s5
vmul.f32 s3 , s0, s5 vmul.f32 s3 , s0, s5
vmls.f32 s3 , s1, s4 vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }


.endm .endm




.macro KERNEL_F1 .macro KERNEL_F1


fldmias X, { s4 }
fldmias Y, { s5 }
vldmia.f32 X, { s4 }
vldmia.f32 Y, { s5 }
vmul.f32 s2 , s0, s4 vmul.f32 s2 , s0, s4
fmacs s2 , s1, s5 fmacs s2 , s1, s5
vmul.f32 s3 , s0, s5 vmul.f32 s3 , s0, s5
vmls.f32 s3 , s1, s4 vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }


.endm .endm


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s4 }
fldmias Y, { s5 }
vldmia.f32 X, { s4 }
vldmia.f32 Y, { s5 }
vmul.f32 s2 , s0, s4 vmul.f32 s2 , s0, s4
fmacs s2 , s1, s5 fmacs s2 , s1, s5
vmul.f32 s3 , s0, s5 vmul.f32 s3 , s0, s5
vmls.f32 s3 , s1, s4 vmls.f32 s3 , s1, s4
fstmias X, { s2 }
fstmias Y, { s3 }
vstmia.f32 X, { s2 }
vstmia.f32 Y, { s3 }


add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y
@@ -230,96 +230,96 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ X, #X_PRE ] pld [ X, #X_PRE ]
pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]


fldmiad X, { d4 - d5 }
fldmiad Y, { d6 - d7 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d6 - d7 }
vmul.f64 d2 , d0, d4 vmul.f64 d2 , d0, d4
fmacd d2 , d1, d6 fmacd d2 , d1, d6
vmul.f64 d3 , d0, d6 vmul.f64 d3 , d0, d6
vmls.f64 d3 , d1, d4 vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }
vmul.f64 d2 , d0, d5 vmul.f64 d2 , d0, d5
fmacd d2 , d1, d7 fmacd d2 , d1, d7
vmul.f64 d3 , d0, d7 vmul.f64 d3 , d0, d7
vmls.f64 d3 , d1, d5 vmls.f64 d3 , d1, d5
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }


fldmiad X, { d4 - d5 }
fldmiad Y, { d6 - d7 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d6 - d7 }
vmul.f64 d2 , d0, d4 vmul.f64 d2 , d0, d4
fmacd d2 , d1, d6 fmacd d2 , d1, d6
vmul.f64 d3 , d0, d6 vmul.f64 d3 , d0, d6
vmls.f64 d3 , d1, d4 vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }
vmul.f64 d2 , d0, d5 vmul.f64 d2 , d0, d5
fmacd d2 , d1, d7 fmacd d2 , d1, d7
vmul.f64 d3 , d0, d7 vmul.f64 d3 , d0, d7
vmls.f64 d3 , d1, d5 vmls.f64 d3 , d1, d5
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]


fldmiad X, { d4 - d5 }
fldmiad Y, { d6 - d7 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d6 - d7 }
vmul.f64 d2 , d0, d4 vmul.f64 d2 , d0, d4
fmacd d2 , d1, d6 fmacd d2 , d1, d6
vmul.f64 d3 , d0, d6 vmul.f64 d3 , d0, d6
vmls.f64 d3 , d1, d4 vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }
vmul.f64 d2 , d0, d5 vmul.f64 d2 , d0, d5
fmacd d2 , d1, d7 fmacd d2 , d1, d7
vmul.f64 d3 , d0, d7 vmul.f64 d3 , d0, d7
vmls.f64 d3 , d1, d5 vmls.f64 d3 , d1, d5
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }


fldmiad X, { d4 - d5 }
fldmiad Y, { d6 - d7 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d6 - d7 }
vmul.f64 d2 , d0, d4 vmul.f64 d2 , d0, d4
fmacd d2 , d1, d6 fmacd d2 , d1, d6
vmul.f64 d3 , d0, d6 vmul.f64 d3 , d0, d6
vmls.f64 d3 , d1, d4 vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }
vmul.f64 d2 , d0, d5 vmul.f64 d2 , d0, d5
fmacd d2 , d1, d7 fmacd d2 , d1, d7
vmul.f64 d3 , d0, d7 vmul.f64 d3 , d0, d7
vmls.f64 d3 , d1, d5 vmls.f64 d3 , d1, d5
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }


.endm .endm




.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X, { d4 - d5 }
fldmiad Y, { d6 - d7 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d6 - d7 }
vmul.f64 d2 , d0, d4 vmul.f64 d2 , d0, d4
fmacd d2 , d1, d6 fmacd d2 , d1, d6
vmul.f64 d3 , d0, d6 vmul.f64 d3 , d0, d6
vmls.f64 d3 , d1, d4 vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }
vmul.f64 d2 , d0, d5 vmul.f64 d2 , d0, d5
fmacd d2 , d1, d7 fmacd d2 , d1, d7
vmul.f64 d3 , d0, d7 vmul.f64 d3 , d0, d7
vmls.f64 d3 , d1, d5 vmls.f64 d3 , d1, d5
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }




.endm .endm


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X, { d4 - d5 }
fldmiad Y, { d6 - d7 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d6 - d7 }
vmul.f64 d2 , d0, d4 vmul.f64 d2 , d0, d4
fmacd d2 , d1, d6 fmacd d2 , d1, d6
vmul.f64 d3 , d0, d6 vmul.f64 d3 , d0, d6
@@ -347,96 +347,96 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ X, #X_PRE ] pld [ X, #X_PRE ]
pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]


fldmias X, { s4 - s5 }
fldmias Y, { s6 - s7 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s6 - s7 }
vmul.f32 s2 , s0, s4 vmul.f32 s2 , s0, s4
fmacs s2 , s1, s6 fmacs s2 , s1, s6
vmul.f32 s3 , s0, s6 vmul.f32 s3 , s0, s6
vmls.f32 s3 , s1, s4 vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }
vmul.f32 s2 , s0, s5 vmul.f32 s2 , s0, s5
fmacs s2 , s1, s7 fmacs s2 , s1, s7
vmul.f32 s3 , s0, s7 vmul.f32 s3 , s0, s7
vmls.f32 s3 , s1, s5 vmls.f32 s3 , s1, s5
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }


fldmias X, { s4 - s5 }
fldmias Y, { s6 - s7 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s6 - s7 }
vmul.f32 s2 , s0, s4 vmul.f32 s2 , s0, s4
fmacs s2 , s1, s6 fmacs s2 , s1, s6
vmul.f32 s3 , s0, s6 vmul.f32 s3 , s0, s6
vmls.f32 s3 , s1, s4 vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }
vmul.f32 s2 , s0, s5 vmul.f32 s2 , s0, s5
fmacs s2 , s1, s7 fmacs s2 , s1, s7
vmul.f32 s3 , s0, s7 vmul.f32 s3 , s0, s7
vmls.f32 s3 , s1, s5 vmls.f32 s3 , s1, s5
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]


fldmias X, { s4 - s5 }
fldmias Y, { s6 - s7 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s6 - s7 }
vmul.f32 s2 , s0, s4 vmul.f32 s2 , s0, s4
fmacs s2 , s1, s6 fmacs s2 , s1, s6
vmul.f32 s3 , s0, s6 vmul.f32 s3 , s0, s6
vmls.f32 s3 , s1, s4 vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }
vmul.f32 s2 , s0, s5 vmul.f32 s2 , s0, s5
fmacs s2 , s1, s7 fmacs s2 , s1, s7
vmul.f32 s3 , s0, s7 vmul.f32 s3 , s0, s7
vmls.f32 s3 , s1, s5 vmls.f32 s3 , s1, s5
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }


fldmias X, { s4 - s5 }
fldmias Y, { s6 - s7 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s6 - s7 }
vmul.f32 s2 , s0, s4 vmul.f32 s2 , s0, s4
fmacs s2 , s1, s6 fmacs s2 , s1, s6
vmul.f32 s3 , s0, s6 vmul.f32 s3 , s0, s6
vmls.f32 s3 , s1, s4 vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }
vmul.f32 s2 , s0, s5 vmul.f32 s2 , s0, s5
fmacs s2 , s1, s7 fmacs s2 , s1, s7
vmul.f32 s3 , s0, s7 vmul.f32 s3 , s0, s7
vmls.f32 s3 , s1, s5 vmls.f32 s3 , s1, s5
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }


.endm .endm




.macro KERNEL_F1 .macro KERNEL_F1


fldmias X, { s4 - s5 }
fldmias Y, { s6 - s7 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s6 - s7 }
vmul.f32 s2 , s0, s4 vmul.f32 s2 , s0, s4
fmacs s2 , s1, s6 fmacs s2 , s1, s6
vmul.f32 s3 , s0, s6 vmul.f32 s3 , s0, s6
vmls.f32 s3 , s1, s4 vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }
vmul.f32 s2 , s0, s5 vmul.f32 s2 , s0, s5
fmacs s2 , s1, s7 fmacs s2 , s1, s7
vmul.f32 s3 , s0, s7 vmul.f32 s3 , s0, s7
vmls.f32 s3 , s1, s5 vmls.f32 s3 , s1, s5
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }




.endm .endm


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s4 - s5 }
fldmias Y, { s6 - s7 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s6 - s7 }
vmul.f32 s2 , s0, s4 vmul.f32 s2 , s0, s4
fmacs s2 , s1, s6 fmacs s2 , s1, s6
vmul.f32 s3 , s0, s6 vmul.f32 s3 , s0, s6


+ 38
- 38
kernel/arm/scal_vfp.S View File

@@ -64,30 +64,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4 .macro KERNEL_F4


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
fldmiad X, { d4 - d7 }
vldmia.f64 X, { d4 - d7 }
vmul.f64 d4, d4, d0 vmul.f64 d4, d4, d0
vmul.f64 d5, d5, d0 vmul.f64 d5, d5, d0
vmul.f64 d6, d6, d0 vmul.f64 d6, d6, d0
fstmiad X!, { d4 - d5 }
vstmia.f64 X!, { d4 - d5 }
vmul.f64 d7, d7, d0 vmul.f64 d7, d7, d0
fstmiad X!, { d6 - d7 }
vstmia.f64 X!, { d6 - d7 }


.endm .endm




.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vmul.f64 d4, d4, d0 vmul.f64 d4, d4, d0
fstmiad X!, { d4 }
vstmia.f64 X!, { d4 }


.endm .endm


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vmul.f64 d4, d4, d0 vmul.f64 d4, d4, d0
fstmiad X, { d4 }
vstmia.f64 X, { d4 }
add X, X, INC_X add X, X, INC_X


.endm .endm
@@ -96,30 +96,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F4 .macro KERNEL_F4


fldmias X, { s4 - s7 }
vldmia.f32 X, { s4 - s7 }
vmul.f32 s4, s4, s0 vmul.f32 s4, s4, s0
vmul.f32 s5, s5, s0 vmul.f32 s5, s5, s0
vmul.f32 s6, s6, s0 vmul.f32 s6, s6, s0
fstmias X!, { s4 - s5 }
vstmia.f32 X!, { s4 - s5 }
vmul.f32 s7, s7, s0 vmul.f32 s7, s7, s0
fstmias X!, { s6 - s7 }
vstmia.f32 X!, { s6 - s7 }


.endm .endm




.macro KERNEL_F1 .macro KERNEL_F1


fldmias X, { s4 }
vldmia.f32 X, { s4 }
vmul.f32 s4, s4, s0 vmul.f32 s4, s4, s0
fstmias X!, { s4 }
vstmia.f32 X!, { s4 }


.endm .endm


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s4 }
vldmia.f32 X, { s4 }
vmul.f32 s4, s4, s0 vmul.f32 s4, s4, s0
fstmias X, { s4 }
vstmia.f32 X, { s4 }
add X, X, INC_X add X, X, INC_X


.endm .endm
@@ -136,58 +136,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


pld [ X, #X_PRE ] pld [ X, #X_PRE ]


fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }
vmul.f64 d2, d0, d4 vmul.f64 d2, d0, d4
vmls.f64 d2, d1, d5 vmls.f64 d2, d1, d5
vmul.f64 d3, d0, d5 vmul.f64 d3, d0, d5
fmacd d3, d1, d4 fmacd d3, d1, d4
fstmiad X!, { d2 - d3 }
vstmia.f64 X!, { d2 - d3 }


fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }
vmul.f64 d2, d0, d4 vmul.f64 d2, d0, d4
vmls.f64 d2, d1, d5 vmls.f64 d2, d1, d5
vmul.f64 d3, d0, d5 vmul.f64 d3, d0, d5
fmacd d3, d1, d4 fmacd d3, d1, d4
fstmiad X!, { d2 - d3 }
vstmia.f64 X!, { d2 - d3 }


pld [ X, #X_PRE ] pld [ X, #X_PRE ]


fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }
vmul.f64 d2, d0, d4 vmul.f64 d2, d0, d4
vmls.f64 d2, d1, d5 vmls.f64 d2, d1, d5
vmul.f64 d3, d0, d5 vmul.f64 d3, d0, d5
fmacd d3, d1, d4 fmacd d3, d1, d4
fstmiad X!, { d2 - d3 }
vstmia.f64 X!, { d2 - d3 }


fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }
vmul.f64 d2, d0, d4 vmul.f64 d2, d0, d4
vmls.f64 d2, d1, d5 vmls.f64 d2, d1, d5
vmul.f64 d3, d0, d5 vmul.f64 d3, d0, d5
fmacd d3, d1, d4 fmacd d3, d1, d4
fstmiad X!, { d2 - d3 }
vstmia.f64 X!, { d2 - d3 }


.endm .endm




.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }
vmul.f64 d2, d0, d4 vmul.f64 d2, d0, d4
vmls.f64 d2, d1, d5 vmls.f64 d2, d1, d5
vmul.f64 d3, d0, d5 vmul.f64 d3, d0, d5
fmacd d3, d1, d4 fmacd d3, d1, d4
fstmiad X!, { d2 - d3 }
vstmia.f64 X!, { d2 - d3 }


.endm .endm


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }
vmul.f64 d2, d0, d4 vmul.f64 d2, d0, d4
vmls.f64 d2, d1, d5 vmls.f64 d2, d1, d5
vmul.f64 d3, d0, d5 vmul.f64 d3, d0, d5
fmacd d3, d1, d4 fmacd d3, d1, d4
fstmiad X, { d2 - d3 }
vstmia.f64 X, { d2 - d3 }
add X, X, INC_X add X, X, INC_X


.endm .endm
@@ -199,56 +199,56 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


pld [ X, #X_PRE ] pld [ X, #X_PRE ]


fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }
vmul.f32 s2, s0, s4 vmul.f32 s2, s0, s4
vmls.f32 s2, s1, s5 vmls.f32 s2, s1, s5
vmul.f32 s3, s0, s5 vmul.f32 s3, s0, s5
fmacs s3, s1, s4 fmacs s3, s1, s4
fstmias X!, { s2 - s3 }
vstmia.f32 X!, { s2 - s3 }


fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }
vmul.f32 s2, s0, s4 vmul.f32 s2, s0, s4
vmls.f32 s2, s1, s5 vmls.f32 s2, s1, s5
vmul.f32 s3, s0, s5 vmul.f32 s3, s0, s5
fmacs s3, s1, s4 fmacs s3, s1, s4
fstmias X!, { s2 - s3 }
vstmia.f32 X!, { s2 - s3 }


fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }
vmul.f32 s2, s0, s4 vmul.f32 s2, s0, s4
vmls.f32 s2, s1, s5 vmls.f32 s2, s1, s5
vmul.f32 s3, s0, s5 vmul.f32 s3, s0, s5
fmacs s3, s1, s4 fmacs s3, s1, s4
fstmias X!, { s2 - s3 }
vstmia.f32 X!, { s2 - s3 }


fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }
vmul.f32 s2, s0, s4 vmul.f32 s2, s0, s4
vmls.f32 s2, s1, s5 vmls.f32 s2, s1, s5
vmul.f32 s3, s0, s5 vmul.f32 s3, s0, s5
fmacs s3, s1, s4 fmacs s3, s1, s4
fstmias X!, { s2 - s3 }
vstmia.f32 X!, { s2 - s3 }


.endm .endm




.macro KERNEL_F1 .macro KERNEL_F1


fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }
vmul.f32 s2, s0, s4 vmul.f32 s2, s0, s4
vmls.f32 s2, s1, s5 vmls.f32 s2, s1, s5
vmul.f32 s3, s0, s5 vmul.f32 s3, s0, s5
fmacs s3, s1, s4 fmacs s3, s1, s4
fstmias X!, { s2 - s3 }
vstmia.f32 X!, { s2 - s3 }


.endm .endm


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }
vmul.f32 s2, s0, s4 vmul.f32 s2, s0, s4
vmls.f32 s2, s1, s5 vmls.f32 s2, s1, s5
vmul.f32 s3, s0, s5 vmul.f32 s3, s0, s5
fmacs s3, s1, s4 fmacs s3, s1, s4
fstmias X, { s2 - s3 }
vstmia.f32 X, { s2 - s3 }
add X, X, INC_X add X, X, INC_X


.endm .endm


+ 16
- 16
kernel/arm/scopy_vfp.S View File

@@ -65,17 +65,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY_F8 .macro COPY_F8


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
fldmias X!, { s0 - s3 }
fldmias X!, { s4 - s7 }
fstmias Y!, { s0 - s3 }
fstmias Y!, { s4 - s7 }
vldmia.f32 X!, { s0 - s3 }
vldmia.f32 X!, { s4 - s7 }
vstmia.f32 Y!, { s0 - s3 }
vstmia.f32 Y!, { s4 - s7 }


.endm .endm


.macro COPY_F1 .macro COPY_F1


fldmias X!, { s0 }
fstmias Y!, { s0 }
vldmia.f32 X!, { s0 }
vstmia.f32 Y!, { s0 }


.endm .endm


@@ -85,23 +85,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY_S4 .macro COPY_S4


nop nop
fldmias X, { s0 }
fstmias Y, { s0 }
vldmia.f32 X, { s0 }
vstmia.f32 Y, { s0 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmias X, { s1 }
fstmias Y, { s1 }
vldmia.f32 X, { s1 }
vstmia.f32 Y, { s1 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmias X, { s0 }
fstmias Y, { s0 }
vldmia.f32 X, { s0 }
vstmia.f32 Y, { s0 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmias X, { s1 }
fstmias Y, { s1 }
vldmia.f32 X, { s1 }
vstmia.f32 Y, { s1 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


@@ -110,8 +110,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY_S1 .macro COPY_S1


fldmias X, { s0 }
fstmias Y, { s0 }
vldmia.f32 X, { s0 }
vstmia.f32 Y, { s0 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y




+ 36
- 36
kernel/arm/sdot_vfp.S View File

@@ -68,26 +68,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F4 .macro KERNEL_F4


fldmias X!, { s14 }
fldmias Y!, { s15 }
vldmia.f32 X!, { s14 }
vldmia.f32 Y!, { s15 }
vmul.f32 s15, s14, s15 vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15 vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4


fldmias X!, { s14 }
fldmias Y!, { s15 }
vldmia.f32 X!, { s14 }
vldmia.f32 Y!, { s15 }
vmul.f32 s15, s14, s15 vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15 vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4


fldmias X!, { s14 }
fldmias Y!, { s15 }
vldmia.f32 X!, { s14 }
vldmia.f32 Y!, { s15 }
vmul.f32 s15, s14, s15 vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15 vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4


fldmias X!, { s14 }
fldmias Y!, { s15 }
vldmia.f32 X!, { s14 }
vldmia.f32 Y!, { s15 }
vmul.f32 s15, s14, s15 vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15 vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
@@ -96,8 +96,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmias X!, { s14 }
fldmias Y!, { s15 }
vldmia.f32 X!, { s14 }
vldmia.f32 Y!, { s15 }
vmul.f32 s15, s14, s15 vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15 vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
@@ -109,32 +109,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


nop nop


fldmias X, { s14 }
fldmias Y, { s15 }
vldmia.f32 X, { s14 }
vldmia.f32 Y, { s15 }
vmul.f32 s15, s14, s15 vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15 vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmias X, { s14 }
fldmias Y, { s15 }
vldmia.f32 X, { s14 }
vldmia.f32 Y, { s15 }
vmul.f32 s15, s14, s15 vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15 vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmias X, { s14 }
fldmias Y, { s15 }
vldmia.f32 X, { s14 }
vldmia.f32 Y, { s15 }
vmul.f32 s15, s14, s15 vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15 vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmias X, { s14 }
fldmias Y, { s15 }
vldmia.f32 X, { s14 }
vldmia.f32 Y, { s15 }
vmul.f32 s15, s14, s15 vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15 vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
@@ -146,8 +146,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s14 }
fldmias Y, { s15 }
vldmia.f32 X, { s14 }
vldmia.f32 Y, { s15 }
vmul.f32 s15, s14, s15 vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15 vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4 vadd.f64 d0 , d0, d4
@@ -162,12 +162,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F4 .macro KERNEL_F4


fldmias X!, { s8 - s9 }
fldmias Y!, { s4 - s5}
vldmia.f32 X!, { s8 - s9 }
vldmia.f32 Y!, { s4 - s5}
fmacs s0 , s4, s8 fmacs s0 , s4, s8
fldmias X!, { s10 - s11 }
vldmia.f32 X!, { s10 - s11 }
fmacs s1 , s5, s9 fmacs s1 , s5, s9
fldmias Y!, { s6 - s7 }
vldmia.f32 Y!, { s6 - s7 }
fmacs s0 , s6, s10 fmacs s0 , s6, s10
fmacs s1 , s7, s11 fmacs s1 , s7, s11


@@ -175,8 +175,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmias X!, { s4 }
fldmias Y!, { s8 }
vldmia.f32 X!, { s4 }
vldmia.f32 Y!, { s8 }
fmacs s0 , s4, s8 fmacs s0 , s4, s8


.endm .endm
@@ -185,26 +185,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_S4 .macro KERNEL_S4


nop nop
fldmias X, { s4 }
fldmias Y, { s8 }
vldmia.f32 X, { s4 }
vldmia.f32 Y, { s8 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y
fmacs s0 , s4, s8 fmacs s0 , s4, s8


fldmias X, { s5 }
fldmias Y, { s9 }
vldmia.f32 X, { s5 }
vldmia.f32 Y, { s9 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y
fmacs s1 , s5, s9 fmacs s1 , s5, s9


fldmias X, { s6 }
fldmias Y, { s10 }
vldmia.f32 X, { s6 }
vldmia.f32 Y, { s10 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y
fmacs s0 , s6, s10 fmacs s0 , s6, s10


fldmias X, { s7 }
fldmias Y, { s11 }
vldmia.f32 X, { s7 }
vldmia.f32 Y, { s11 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y
fmacs s1 , s7, s11 fmacs s1 , s7, s11
@@ -214,8 +214,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s4 }
fldmias Y, { s8 }
vldmia.f32 X, { s4 }
vldmia.f32 Y, { s8 }
add X, X, INC_X add X, X, INC_X
fmacs s0 , s4, s8 fmacs s0 , s4, s8
add Y, Y, INC_Y add Y, Y, INC_Y


+ 2
- 2
kernel/arm/sgemm_kernel_4x2_vfp.S View File

@@ -112,8 +112,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL4x2_SUB .macro KERNEL4x2_SUB


fldmias AO! , { s0 - s3 }
fldmias BO! , { s4 - s5 }
vldmia.f32 AO! , { s0 - s3 }
vldmia.f32 BO! , { s4 - s5 }


fmacs s8 , s0, s4 fmacs s8 , s0, s4
fmacs s9 , s1, s4 fmacs s9 , s1, s4


+ 20
- 20
kernel/arm/sgemm_kernel_4x4_vfpv3.S View File

@@ -136,29 +136,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL4x4_I .macro KERNEL4x4_I


pld [ AO , #A_PRE ] pld [ AO , #A_PRE ]
fldmias AO!, { s0 - s1 }
vldmia.f32 AO!, { s0 - s1 }
pld [ BO , #B_PRE ] pld [ BO , #B_PRE ]
fldmias BO!, { s8 - s9 }
vldmia.f32 BO!, { s8 - s9 }


fmuls s16 , s0, s8 fmuls s16 , s0, s8
fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmuls s17 , s1, s8 fmuls s17 , s1, s8
fmuls s18 , s2, s8 fmuls s18 , s2, s8
fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmuls s19 , s3, s8 fmuls s19 , s3, s8


fmuls s20 , s0, s9 fmuls s20 , s0, s9
fldmias AO!, { s4 - s5 }
vldmia.f32 AO!, { s4 - s5 }
fmuls s21 , s1, s9 fmuls s21 , s1, s9
fmuls s22 , s2, s9 fmuls s22 , s2, s9
fldmias AO!, { s6 - s7 }
vldmia.f32 AO!, { s6 - s7 }
fmuls s23 , s3, s9 fmuls s23 , s3, s9


fmuls s24 , s0, s10 fmuls s24 , s0, s10
fldmias BO!, { s12 - s13 }
vldmia.f32 BO!, { s12 - s13 }
fmuls s25 , s1, s10 fmuls s25 , s1, s10
fmuls s26 , s2, s10 fmuls s26 , s2, s10
fldmias BO!, { s14 - s15 }
vldmia.f32 BO!, { s14 - s15 }
fmuls s27 , s3, s10 fmuls s27 , s3, s10


fmuls s28 , s0, s11 fmuls s28 , s0, s11
@@ -174,20 +174,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ AO , #A_PRE ] pld [ AO , #A_PRE ]
fmacs s16 , s4, s12 fmacs s16 , s4, s12
fmacs s17 , s5, s12 fmacs s17 , s5, s12
fldmias AO!, { s0 - s3 }
vldmia.f32 AO!, { s0 - s3 }
fmacs s18 , s6, s12 fmacs s18 , s6, s12
pld [ BO , #B_PRE ] pld [ BO , #B_PRE ]
fmacs s19 , s7, s12 fmacs s19 , s7, s12


fmacs s20 , s4, s13 fmacs s20 , s4, s13
fldmias BO!, { s8 - s11 }
vldmia.f32 BO!, { s8 - s11 }
fmacs s21 , s5, s13 fmacs s21 , s5, s13
fmacs s22 , s6, s13 fmacs s22 , s6, s13
//fldmias AO!, { s2 - s3 }
//vldmia.f32 AO!, { s2 - s3 }
fmacs s23 , s7, s13 fmacs s23 , s7, s13


fmacs s24 , s4, s14 fmacs s24 , s4, s14
//fldmias BO!, { s10 - s11 }
//vldmia.f32 BO!, { s10 - s11 }
fmacs s25 , s5, s14 fmacs s25 , s5, s14
fmacs s26 , s6, s14 fmacs s26 , s6, s14
fmacs s27 , s7, s14 fmacs s27 , s7, s14
@@ -203,17 +203,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL4x4_M1 .macro KERNEL4x4_M1


fmacs s16 , s0, s8 fmacs s16 , s0, s8
fldmias AO!, { s4 - s7 }
vldmia.f32 AO!, { s4 - s7 }
fmacs s17 , s1, s8 fmacs s17 , s1, s8
fmacs s18 , s2, s8 fmacs s18 , s2, s8
fldmias BO!, { s12 - s15 }
//fldmias AO!, { s6 - s7 }
vldmia.f32 BO!, { s12 - s15 }
//vldmia.f32 AO!, { s6 - s7 }
fmacs s19 , s3, s8 fmacs s19 , s3, s8


fmacs s20 , s0, s9 fmacs s20 , s0, s9
fmacs s21 , s1, s9 fmacs s21 , s1, s9
fmacs s22 , s2, s9 fmacs s22 , s2, s9
//fldmias BO!, { s14 - s15 }
//vldmia.f32 BO!, { s14 - s15 }
fmacs s23 , s3, s9 fmacs s23 , s3, s9


fmacs s24 , s0, s10 fmacs s24 , s0, s10
@@ -300,7 +300,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA flds s0, ALPHA
add r4 , CO2, r3 add r4 , CO2, r3


fldmias CO1, { s8 - s11 }
vldmia.f32 CO1, { s8 - s11 }


fmacs s8 , s0 , s16 fmacs s8 , s0 , s16
flds s12, [CO2] flds s12, [CO2]
@@ -322,7 +322,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


pld [ CO1 , #C_PRE ] pld [ CO1 , #C_PRE ]


fldmias r4, { s8 - s11 }
vldmia.f32 r4, { s8 - s11 }


fmacs s8 , s0 , s24 fmacs s8 , s0 , s24
fsts s12, [CO2] fsts s12, [CO2]
@@ -338,7 +338,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add CO2, r4 , r3 add CO2, r4 , r3




fldmias CO2, { s12 - s15 }
vldmia.f32 CO2, { s12 - s15 }


fsts s8 , [r4 ] fsts s8 , [r4 ]
fmacs s12, s0 , s28 fmacs s12, s0 , s28
@@ -350,7 +350,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmacs s15, s0 , s31 fmacs s15, s0 , s31


pld [ r4 , #C_PRE ] pld [ r4 , #C_PRE ]
fstmias CO2, { s12 - s15 }
vstmia.f32 CO2, { s12 - s15 }
pld [ CO2 , #C_PRE ] pld [ CO2 , #C_PRE ]


add CO1, CO1, #16 add CO1, CO1, #16


+ 35
- 35
kernel/arm/sgemm_tcopy_4_vfp.S View File

@@ -76,21 +76,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY4x4_1 .macro COPY4x4_1


pld [ AO1, #A_PRE ] pld [ AO1, #A_PRE ]
fldmias AO1, { s0 - s3 }
vldmia.f32 AO1, { s0 - s3 }


add r3, AO1, LDA add r3, AO1, LDA
pld [ r3, #A_PRE ] pld [ r3, #A_PRE ]
fldmias r3, { s4 - s7 }
vldmia.f32 r3, { s4 - s7 }


add r3, r3, LDA add r3, r3, LDA
pld [ r3, #A_PRE ] pld [ r3, #A_PRE ]
fldmias r3, { s8 - s11 }
vldmia.f32 r3, { s8 - s11 }


add r3, r3, LDA add r3, r3, LDA
pld [ r3, #A_PRE ] pld [ r3, #A_PRE ]
fldmias r3, { s12 - s15 }
vldmia.f32 r3, { s12 - s15 }


fstmias BO1, { s0 - s15 }
vstmia.f32 BO1, { s0 - s15 }
add AO1, AO1, #16 add AO1, AO1, #16
add BO1, BO1, M4 add BO1, BO1, M4


@@ -98,18 +98,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY4x4_2 .macro COPY4x4_2


fldmias AO1, { s0 - s3 }
vldmia.f32 AO1, { s0 - s3 }


add r3, AO1, LDA add r3, AO1, LDA
fldmias r3, { s4 - s7 }
vldmia.f32 r3, { s4 - s7 }


add r3, r3, LDA add r3, r3, LDA
fldmias r3, { s8 - s11 }
vldmia.f32 r3, { s8 - s11 }


add r3, r3, LDA add r3, r3, LDA
fldmias r3, { s12 - s15 }
vldmia.f32 r3, { s12 - s15 }


fstmias BO1, { s0 - s15 }
vstmia.f32 BO1, { s0 - s15 }
add AO1, AO1, #16 add AO1, AO1, #16
add BO1, BO1, M4 add BO1, BO1, M4


@@ -118,18 +118,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY2x4 .macro COPY2x4


fldmias AO1, { s0 - s1 }
vldmia.f32 AO1, { s0 - s1 }


add r3, AO1, LDA add r3, AO1, LDA
fldmias r3, { s2 - s3 }
vldmia.f32 r3, { s2 - s3 }


add r3, r3, LDA add r3, r3, LDA
fldmias r3, { s4 - s5 }
vldmia.f32 r3, { s4 - s5 }


add r3, r3, LDA add r3, r3, LDA
fldmias r3, { s6 - s7 }
vldmia.f32 r3, { s6 - s7 }


fstmias BO2, { s0 - s7 }
vstmia.f32 BO2, { s0 - s7 }
add AO1, AO1, #8 add AO1, AO1, #8
add BO2, BO2, #32 add BO2, BO2, #32


@@ -137,18 +137,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY1x4 .macro COPY1x4


fldmias AO1, { s0 }
vldmia.f32 AO1, { s0 }


add r3, AO1, LDA add r3, AO1, LDA
fldmias r3, { s1 }
vldmia.f32 r3, { s1 }


add r3, r3, LDA add r3, r3, LDA
fldmias r3, { s2 }
vldmia.f32 r3, { s2 }


add r3, r3, LDA add r3, r3, LDA
fldmias r3, { s3 }
vldmia.f32 r3, { s3 }


fstmias BO3, { s0 - s3 }
vstmia.f32 BO3, { s0 - s3 }
add AO1, AO1, #4 add AO1, AO1, #4
add BO3, BO3, #16 add BO3, BO3, #16


@@ -158,12 +158,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY4x2 .macro COPY4x2


fldmias AO1, { s0 - s3 }
vldmia.f32 AO1, { s0 - s3 }


add r3, AO1, LDA add r3, AO1, LDA
fldmias r3, { s4 - s7 }
vldmia.f32 r3, { s4 - s7 }


fstmias BO1, { s0 - s7 }
vstmia.f32 BO1, { s0 - s7 }
add AO1, AO1, #16 add AO1, AO1, #16
add BO1, BO1, M4 add BO1, BO1, M4


@@ -171,12 +171,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY2x2 .macro COPY2x2


fldmias AO1, { s0 - s1 }
vldmia.f32 AO1, { s0 - s1 }


add r3, AO1, LDA add r3, AO1, LDA
fldmias r3, { s2 - s3 }
vldmia.f32 r3, { s2 - s3 }


fstmias BO2, { s0 - s3 }
vstmia.f32 BO2, { s0 - s3 }
add AO1, AO1, #8 add AO1, AO1, #8
add BO2, BO2, #16 add BO2, BO2, #16


@@ -184,12 +184,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY1x2 .macro COPY1x2


fldmias AO1, { s0 }
vldmia.f32 AO1, { s0 }


add r3, AO1, LDA add r3, AO1, LDA
fldmias r3, { s1 }
vldmia.f32 r3, { s1 }


fstmias BO3, { s0 - s1 }
vstmia.f32 BO3, { s0 - s1 }
add AO1, AO1, #4 add AO1, AO1, #4
add BO3, BO3, #8 add BO3, BO3, #8


@@ -199,9 +199,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY4x1 .macro COPY4x1


fldmias AO1, { s0 - s3 }
vldmia.f32 AO1, { s0 - s3 }


fstmias BO1, { s0 - s3 }
vstmia.f32 BO1, { s0 - s3 }
add AO1, AO1, #16 add AO1, AO1, #16
add BO1, BO1, M4 add BO1, BO1, M4


@@ -209,9 +209,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY2x1 .macro COPY2x1


fldmias AO1, { s0 - s1 }
vldmia.f32 AO1, { s0 - s1 }


fstmias BO2, { s0 - s1 }
vstmia.f32 BO2, { s0 - s1 }
add AO1, AO1, #8 add AO1, AO1, #8
add BO2, BO2, #8 add BO2, BO2, #8


@@ -219,9 +219,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY1x1 .macro COPY1x1


fldmias AO1, { s0 }
vldmia.f32 AO1, { s0 }


fstmias BO3, { s0 }
vstmia.f32 BO3, { s0 }
add AO1, AO1, #4 add AO1, AO1, #4
add BO3, BO3, #4 add BO3, BO3, #4




+ 2
- 2
kernel/arm/strmm_kernel_4x2_vfp.S View File

@@ -118,8 +118,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL4x2_SUB .macro KERNEL4x2_SUB


fldmias AO!, { s0 - s3 }
fldmias BO!, { s4 - s5 }
vldmia.f32 AO!, { s0 - s3 }
vldmia.f32 BO!, { s4 - s5 }


fmacs s8 , s0, s4 fmacs s8 , s0, s4
fmacs s9 , s1, s4 fmacs s9 , s1, s4


+ 17
- 17
kernel/arm/strmm_kernel_4x4_vfpv3.S View File

@@ -122,30 +122,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL4x4_I .macro KERNEL4x4_I


fldmias AO!, { s0 - s1 }
vldmia.f32 AO!, { s0 - s1 }
pld [ AO , #A_PRE-8 ] pld [ AO , #A_PRE-8 ]
fldmias BO!, { s8 - s9 }
vldmia.f32 BO!, { s8 - s9 }
pld [ BO , #B_PRE-8 ] pld [ BO , #B_PRE-8 ]


fmuls s16 , s0, s8 fmuls s16 , s0, s8
fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmuls s17 , s1, s8 fmuls s17 , s1, s8
fmuls s18 , s2, s8 fmuls s18 , s2, s8
fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmuls s19 , s3, s8 fmuls s19 , s3, s8


fmuls s20 , s0, s9 fmuls s20 , s0, s9
fldmias AO!, { s4 - s5 }
vldmia.f32 AO!, { s4 - s5 }
fmuls s21 , s1, s9 fmuls s21 , s1, s9
fmuls s22 , s2, s9 fmuls s22 , s2, s9
fldmias AO!, { s6 - s7 }
vldmia.f32 AO!, { s6 - s7 }
fmuls s23 , s3, s9 fmuls s23 , s3, s9


fmuls s24 , s0, s10 fmuls s24 , s0, s10
fldmias BO!, { s12 - s13 }
vldmia.f32 BO!, { s12 - s13 }
fmuls s25 , s1, s10 fmuls s25 , s1, s10
fmuls s26 , s2, s10 fmuls s26 , s2, s10
fldmias BO!, { s14 - s15 }
vldmia.f32 BO!, { s14 - s15 }
fmuls s27 , s3, s10 fmuls s27 , s3, s10


fmuls s28 , s0, s11 fmuls s28 , s0, s11
@@ -161,20 +161,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ AO , #A_PRE ] pld [ AO , #A_PRE ]
fmacs s16 , s4, s12 fmacs s16 , s4, s12
fmacs s17 , s5, s12 fmacs s17 , s5, s12
fldmias AO!, { s0 - s1 }
vldmia.f32 AO!, { s0 - s1 }
fmacs s18 , s6, s12 fmacs s18 , s6, s12
pld [ BO , #B_PRE ] pld [ BO , #B_PRE ]
fmacs s19 , s7, s12 fmacs s19 , s7, s12


fmacs s20 , s4, s13 fmacs s20 , s4, s13
fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmacs s21 , s5, s13 fmacs s21 , s5, s13
fmacs s22 , s6, s13 fmacs s22 , s6, s13
fldmias BO!, { s8 - s9 }
vldmia.f32 BO!, { s8 - s9 }
fmacs s23 , s7, s13 fmacs s23 , s7, s13


fmacs s24 , s4, s14 fmacs s24 , s4, s14
fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmacs s25 , s5, s14 fmacs s25 , s5, s14
fmacs s26 , s6, s14 fmacs s26 , s6, s14
fmacs s27 , s7, s14 fmacs s27 , s7, s14
@@ -190,17 +190,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL4x4_M1 .macro KERNEL4x4_M1


fmacs s16 , s0, s8 fmacs s16 , s0, s8
fldmias AO!, { s4 - s5 }
vldmia.f32 AO!, { s4 - s5 }
fmacs s17 , s1, s8 fmacs s17 , s1, s8
fmacs s18 , s2, s8 fmacs s18 , s2, s8
fldmias AO!, { s6 - s7 }
vldmia.f32 AO!, { s6 - s7 }
fmacs s19 , s3, s8 fmacs s19 , s3, s8


fmacs s20 , s0, s9 fmacs s20 , s0, s9
fldmias BO!, { s12 - s13 }
vldmia.f32 BO!, { s12 - s13 }
fmacs s21 , s1, s9 fmacs s21 , s1, s9
fmacs s22 , s2, s9 fmacs s22 , s2, s9
fldmias BO!, { s14 - s15 }
vldmia.f32 BO!, { s14 - s15 }
fmacs s23 , s3, s9 fmacs s23 , s3, s9


fmacs s24 , s0, s10 fmacs s24 , s0, s10
@@ -325,7 +325,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fsts s11, [r4 , #12 ] fsts s11, [r4 , #12 ]
fmuls s15, s0 , s31 fmuls s15, s0 , s31


fstmias CO2, { s12 - s15 }
vstmia.f32 CO2, { s12 - s15 }


add CO1, CO1, #16 add CO1, CO1, #16




+ 56
- 56
kernel/arm/swap_vfp.S View File

@@ -103,29 +103,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]
fldmiad X, { d0 - d3 }
fldmiad Y, { d4 - d7 }
fstmiad Y!, { d0 - d3 }
fstmiad X!, { d4 - d7}
vldmia.f64 X, { d0 - d3 }
vldmia.f64 Y, { d4 - d7 }
vstmia.f64 Y!, { d0 - d3 }
vstmia.f64 X!, { d4 - d7}


.endm .endm




.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X, { d0 }
fldmiad Y, { d4 }
fstmiad Y!, { d0 }
fstmiad X!, { d4 }
vldmia.f64 X, { d0 }
vldmia.f64 Y, { d4 }
vstmia.f64 Y!, { d0 }
vstmia.f64 X!, { d4 }


.endm .endm


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X, { d0 }
fldmiad Y, { d4 }
fstmiad Y, { d0 }
fstmiad X, { d4 }
vldmia.f64 X, { d0 }
vldmia.f64 Y, { d4 }
vstmia.f64 Y, { d0 }
vstmia.f64 X, { d4 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


@@ -135,29 +135,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F4 .macro KERNEL_F4


fldmias X, { s0 - s3 }
fldmias Y, { s4 - s7 }
fstmias Y!, { s0 - s3 }
fstmias X!, { s4 - s7}
vldmia.f32 X, { s0 - s3 }
vldmia.f32 Y, { s4 - s7 }
vstmia.f32 Y!, { s0 - s3 }
vstmia.f32 X!, { s4 - s7}


.endm .endm




.macro KERNEL_F1 .macro KERNEL_F1


fldmias X, { s0 }
fldmias Y, { s4 }
fstmias Y!, { s0 }
fstmias X!, { s4 }
vldmia.f32 X, { s0 }
vldmia.f32 Y, { s4 }
vstmia.f32 Y!, { s0 }
vstmia.f32 X!, { s4 }


.endm .endm


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s0 }
fldmias Y, { s4 }
fstmias Y, { s0 }
fstmias X, { s4 }
vldmia.f32 X, { s0 }
vldmia.f32 Y, { s4 }
vstmia.f32 Y, { s0 }
vstmia.f32 X, { s4 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


@@ -174,35 +174,35 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]
fldmiad X, { d0 - d3 }
fldmiad Y, { d4 - d7 }
fstmiad Y!, { d0 - d3 }
fstmiad X!, { d4 - d7}
vldmia.f64 X, { d0 - d3 }
vldmia.f64 Y, { d4 - d7 }
vstmia.f64 Y!, { d0 - d3 }
vstmia.f64 X!, { d4 - d7}


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]
fldmiad X, { d0 - d3 }
fldmiad Y, { d4 - d7 }
fstmiad Y!, { d0 - d3 }
fstmiad X!, { d4 - d7}
vldmia.f64 X, { d0 - d3 }
vldmia.f64 Y, { d4 - d7 }
vstmia.f64 Y!, { d0 - d3 }
vstmia.f64 X!, { d4 - d7}


.endm .endm


.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X, { d0 - d1 }
fldmiad Y, { d4 - d5 }
fstmiad Y!, { d0 - d1 }
fstmiad X!, { d4 - d5 }
vldmia.f64 X, { d0 - d1 }
vldmia.f64 Y, { d4 - d5 }
vstmia.f64 Y!, { d0 - d1 }
vstmia.f64 X!, { d4 - d5 }


.endm .endm


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X, { d0 - d1 }
fldmiad Y, { d4 - d5 }
fstmiad Y, { d0 - d1 }
fstmiad X, { d4 - d5 }
vldmia.f64 X, { d0 - d1 }
vldmia.f64 Y, { d4 - d5 }
vstmia.f64 Y, { d0 - d1 }
vstmia.f64 X, { d4 - d5 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


@@ -215,33 +215,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]
fldmias X, { s0 - s3 }
fldmias Y, { s4 - s7 }
fstmias Y!, { s0 - s3 }
fstmias X!, { s4 - s7}
vldmia.f32 X, { s0 - s3 }
vldmia.f32 Y, { s4 - s7 }
vstmia.f32 Y!, { s0 - s3 }
vstmia.f32 X!, { s4 - s7}


fldmias X, { s0 - s3 }
fldmias Y, { s4 - s7 }
fstmias Y!, { s0 - s3 }
fstmias X!, { s4 - s7}
vldmia.f32 X, { s0 - s3 }
vldmia.f32 Y, { s4 - s7 }
vstmia.f32 Y!, { s0 - s3 }
vstmia.f32 X!, { s4 - s7}


.endm .endm


.macro KERNEL_F1 .macro KERNEL_F1


fldmias X, { s0 - s1 }
fldmias Y, { s4 - s5 }
fstmias Y!, { s0 - s1 }
fstmias X!, { s4 - s5 }
vldmia.f32 X, { s0 - s1 }
vldmia.f32 Y, { s4 - s5 }
vstmia.f32 Y!, { s0 - s1 }
vstmia.f32 X!, { s4 - s5 }


.endm .endm


.macro KERNEL_S1 .macro KERNEL_S1


fldmias X, { s0 - s1 }
fldmias Y, { s4 - s5 }
fstmias Y, { s0 - s1 }
fstmias X, { s4 - s5 }
vldmia.f32 X, { s0 - s1 }
vldmia.f32 Y, { s4 - s5 }
vstmia.f32 Y, { s0 - s1 }
vstmia.f32 X, { s4 - s5 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y




+ 14
- 14
kernel/arm/zcopy_vfp.S View File

@@ -66,15 +66,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


pld [ X, #X_PRE ] pld [ X, #X_PRE ]
pld [ X, #X_PRE+32 ] pld [ X, #X_PRE+32 ]
fldmiad X!, { d0 - d7 }
fstmiad Y!, { d0 - d7 }
vldmia.f64 X!, { d0 - d7 }
vstmia.f64 Y!, { d0 - d7 }


.endm .endm


.macro COPY_F1 .macro COPY_F1


fldmiad X!, { d0 - d1 }
fstmiad Y!, { d0 - d1 }
vldmia.f64 X!, { d0 - d1 }
vstmia.f64 Y!, { d0 - d1 }


.endm .endm


@@ -84,23 +84,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY_S4 .macro COPY_S4


nop nop
fldmiad X, { d0 - d1 }
fstmiad Y, { d0 - d1 }
vldmia.f64 X, { d0 - d1 }
vstmia.f64 Y, { d0 - d1 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmiad X, { d2 - d3 }
fstmiad Y, { d2 - d3 }
vldmia.f64 X, { d2 - d3 }
vstmia.f64 Y, { d2 - d3 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmiad X, { d0 - d1 }
fstmiad Y, { d0 - d1 }
vldmia.f64 X, { d0 - d1 }
vstmia.f64 Y, { d0 - d1 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmiad X, { d2 - d3 }
fstmiad Y, { d2 - d3 }
vldmia.f64 X, { d2 - d3 }
vstmia.f64 Y, { d2 - d3 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


@@ -109,8 +109,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY_S1 .macro COPY_S1


fldmiad X, { d0 - d1 }
fstmiad Y, { d0 - d1 }
vldmia.f64 X, { d0 - d1 }
vstmia.f64 Y, { d0 - d1 }
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y




+ 20
- 20
kernel/arm/zdot_vfp.S View File

@@ -76,15 +76,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ X, #X_PRE ] pld [ X, #X_PRE ]
pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]


fldmiad X!, { d4 - d5 }
fldmiad Y!, { d8 - d9 }
vldmia.f64 X!, { d4 - d5 }
vldmia.f64 Y!, { d8 - d9 }
fmacd d0 , d4, d8 fmacd d0 , d4, d8
fmacd d1 , d4, d9 fmacd d1 , d4, d9
fldmiad X!, { d6 - d7 }
vldmia.f64 X!, { d6 - d7 }
fmacd d2 , d5, d9 fmacd d2 , d5, d9
fmacd d3 , d5, d8 fmacd d3 , d5, d8


fldmiad Y!, { d10 - d11 }
vldmia.f64 Y!, { d10 - d11 }
fmacd d0 , d6, d10 fmacd d0 , d6, d10
fmacd d1 , d6, d11 fmacd d1 , d6, d11
pld [ X, #X_PRE ] pld [ X, #X_PRE ]
@@ -93,15 +93,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


pld [ Y, #X_PRE ] pld [ Y, #X_PRE ]


fldmiad X!, { d4 - d5 }
fldmiad Y!, { d8 - d9 }
vldmia.f64 X!, { d4 - d5 }
vldmia.f64 Y!, { d8 - d9 }
fmacd d0 , d4, d8 fmacd d0 , d4, d8
fmacd d1 , d4, d9 fmacd d1 , d4, d9
fldmiad X!, { d6 - d7 }
vldmia.f64 X!, { d6 - d7 }
fmacd d2 , d5, d9 fmacd d2 , d5, d9
fmacd d3 , d5, d8 fmacd d3 , d5, d8


fldmiad Y!, { d10 - d11 }
vldmia.f64 Y!, { d10 - d11 }
fmacd d0 , d6, d10 fmacd d0 , d6, d10
fmacd d1 , d6, d11 fmacd d1 , d6, d11
fmacd d2 , d7, d11 fmacd d2 , d7, d11
@@ -111,8 +111,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1 .macro KERNEL_F1


fldmiad X!, { d4 - d5 }
fldmiad Y!, { d8 - d9 }
vldmia.f64 X!, { d4 - d5 }
vldmia.f64 Y!, { d8 - d9 }
fmacd d0 , d4, d8 fmacd d0 , d4, d8
fmacd d1 , d4, d9 fmacd d1 , d4, d9
fmacd d2 , d5, d9 fmacd d2 , d5, d9
@@ -127,8 +127,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


nop nop


fldmiad X, { d4 - d5 }
fldmiad Y, { d8 - d9 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d8 - d9 }
fmacd d0 , d4, d8 fmacd d0 , d4, d8
fmacd d1 , d4, d9 fmacd d1 , d4, d9
fmacd d2 , d5, d9 fmacd d2 , d5, d9
@@ -136,8 +136,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmiad X, { d4 - d5 }
fldmiad Y, { d8 - d9 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d8 - d9 }
fmacd d0 , d4, d8 fmacd d0 , d4, d8
fmacd d1 , d4, d9 fmacd d1 , d4, d9
fmacd d2 , d5, d9 fmacd d2 , d5, d9
@@ -145,8 +145,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmiad X, { d4 - d5 }
fldmiad Y, { d8 - d9 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d8 - d9 }
fmacd d0 , d4, d8 fmacd d0 , d4, d8
fmacd d1 , d4, d9 fmacd d1 , d4, d9
fmacd d2 , d5, d9 fmacd d2 , d5, d9
@@ -154,8 +154,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add X, X, INC_X add X, X, INC_X
add Y, Y, INC_Y add Y, Y, INC_Y


fldmiad X, { d4 - d5 }
fldmiad Y, { d8 - d9 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d8 - d9 }
fmacd d0 , d4, d8 fmacd d0 , d4, d8
fmacd d1 , d4, d9 fmacd d1 , d4, d9
fmacd d2 , d5, d9 fmacd d2 , d5, d9
@@ -168,8 +168,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1 .macro KERNEL_S1


fldmiad X, { d4 - d5 }
fldmiad Y, { d8 - d9 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d8 - d9 }
fmacd d0 , d4, d8 fmacd d0 , d4, d8
fmacd d1 , d4, d9 fmacd d1 , d4, d9
fmacd d2 , d5, d9 fmacd d2 , d5, d9


+ 12
- 12
kernel/arm/zgemm_kernel_2x2_vfp.S View File

@@ -360,7 +360,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R fldd d0, ALPHA_R
fldd d1, ALPHA_I fldd d1, ALPHA_I


fldmiad CO1, { d4 - d7 }
vldmia.f64 CO1, { d4 - d7 }


FMAC_R1 d4 , d0 , d8 FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9 FMAC_I1 d5 , d0 , d9
@@ -372,9 +372,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d11 FMAC_R2 d6 , d1 , d11
FMAC_I2 d7 , d1 , d10 FMAC_I2 d7 , d1 , d10


fstmiad CO1, { d4 - d7 }
vstmia.f64 CO1, { d4 - d7 }


fldmiad CO2, { d4 - d7 }
vldmia.f64 CO2, { d4 - d7 }


FMAC_R1 d4 , d0 , d12 FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13 FMAC_I1 d5 , d0 , d13
@@ -386,7 +386,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d15 FMAC_R2 d6 , d1 , d15
FMAC_I2 d7 , d1 , d14 FMAC_I2 d7 , d1 , d14


fstmiad CO2, { d4 - d7 }
vstmia.f64 CO2, { d4 - d7 }


add CO1, CO1, #32 add CO1, CO1, #32


@@ -543,23 +543,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R fldd d0, ALPHA_R
fldd d1, ALPHA_I fldd d1, ALPHA_I


fldmiad CO1, { d4 - d5 }
vldmia.f64 CO1, { d4 - d5 }


FMAC_R1 d4 , d0 , d8 FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9 FMAC_I1 d5 , d0 , d9
FMAC_R2 d4 , d1 , d9 FMAC_R2 d4 , d1 , d9
FMAC_I2 d5 , d1 , d8 FMAC_I2 d5 , d1 , d8


fstmiad CO1, { d4 - d5 }
vstmia.f64 CO1, { d4 - d5 }


fldmiad CO2, { d4 - d5 }
vldmia.f64 CO2, { d4 - d5 }


FMAC_R1 d4 , d0 , d12 FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13 FMAC_I1 d5 , d0 , d13
FMAC_R2 d4 , d1 , d13 FMAC_R2 d4 , d1 , d13
FMAC_I2 d5 , d1 , d12 FMAC_I2 d5 , d1 , d12


fstmiad CO2, { d4 - d5 }
vstmia.f64 CO2, { d4 - d5 }


add CO1, CO1, #16 add CO1, CO1, #16


@@ -714,7 +714,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R fldd d0, ALPHA_R
fldd d1, ALPHA_I fldd d1, ALPHA_I


fldmiad CO1, { d4 - d7 }
vldmia.f64 CO1, { d4 - d7 }


FMAC_R1 d4 , d0 , d8 FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9 FMAC_I1 d5 , d0 , d9
@@ -726,7 +726,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d11 FMAC_R2 d6 , d1 , d11
FMAC_I2 d7 , d1 , d10 FMAC_I2 d7 , d1 , d10


fstmiad CO1, { d4 - d7 }
vstmia.f64 CO1, { d4 - d7 }


add CO1, CO1, #32 add CO1, CO1, #32


@@ -843,14 +843,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R fldd d0, ALPHA_R
fldd d1, ALPHA_I fldd d1, ALPHA_I


fldmiad CO1, { d4 - d5 }
vldmia.f64 CO1, { d4 - d5 }


FMAC_R1 d4 , d0 , d8 FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9 FMAC_I1 d5 , d0 , d9
FMAC_R2 d4 , d1 , d9 FMAC_R2 d4 , d1 , d9
FMAC_I2 d5 , d1 , d8 FMAC_I2 d5 , d1 , d8


fstmiad CO1, { d4 - d5 }
vstmia.f64 CO1, { d4 - d5 }


add CO1, CO1, #16 add CO1, CO1, #16




+ 12
- 12
kernel/arm/zgemm_kernel_2x2_vfpv3.S View File

@@ -374,8 +374,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R fldd d0, ALPHA_R
fldd d1, ALPHA_I fldd d1, ALPHA_I


fldmiad CO1, { d4 - d7 }
fldmiad CO2, { d8 - d11 }
vldmia.f64 CO1, { d4 - d7 }
vldmia.f64 CO2, { d8 - d11 }


FADD_R d16, d24 , d16 FADD_R d16, d24 , d16
FADD_I d17, d25 , d17 FADD_I d17, d25 , d17
@@ -406,8 +406,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d10, d1 , d23 FMAC_R2 d10, d1 , d23
FMAC_I2 d11, d1 , d22 FMAC_I2 d11, d1 , d22


fstmiad CO1, { d4 - d7 }
fstmiad CO2, { d8 - d11 }
vstmia.f64 CO1, { d4 - d7 }
vstmia.f64 CO2, { d8 - d11 }


add CO1, CO1, #32 add CO1, CO1, #32


@@ -570,8 +570,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R fldd d0, ALPHA_R
fldd d1, ALPHA_I fldd d1, ALPHA_I


fldmiad CO1, { d4 - d5 }
fldmiad CO2, { d8 - d9 }
vldmia.f64 CO1, { d4 - d5 }
vldmia.f64 CO2, { d8 - d9 }


FADD_R d16, d24 , d16 FADD_R d16, d24 , d16
FADD_I d17, d25 , d17 FADD_I d17, d25 , d17
@@ -588,8 +588,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d8 , d1 , d21 FMAC_R2 d8 , d1 , d21
FMAC_I2 d9 , d1 , d20 FMAC_I2 d9 , d1 , d20


fstmiad CO1, { d4 - d5 }
fstmiad CO2, { d8 - d9 }
vstmia.f64 CO1, { d4 - d5 }
vstmia.f64 CO2, { d8 - d9 }


add CO1, CO1, #16 add CO1, CO1, #16


@@ -752,7 +752,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R fldd d0, ALPHA_R
fldd d1, ALPHA_I fldd d1, ALPHA_I


fldmiad CO1, { d4 - d7 }
vldmia.f64 CO1, { d4 - d7 }


FADD_R d16, d24 , d16 FADD_R d16, d24 , d16
FADD_I d17, d25 , d17 FADD_I d17, d25 , d17
@@ -769,7 +769,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d19 FMAC_R2 d6 , d1 , d19
FMAC_I2 d7 , d1 , d18 FMAC_I2 d7 , d1 , d18


fstmiad CO1, { d4 - d7 }
vstmia.f64 CO1, { d4 - d7 }


add CO1, CO1, #32 add CO1, CO1, #32


@@ -887,7 +887,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R fldd d0, ALPHA_R
fldd d1, ALPHA_I fldd d1, ALPHA_I


fldmiad CO1, { d4 - d5 }
vldmia.f64 CO1, { d4 - d5 }


FADD_R d16, d24 , d16 FADD_R d16, d24 , d16
FADD_I d17, d25 , d17 FADD_I d17, d25 , d17
@@ -897,7 +897,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d4 , d1 , d17 FMAC_R2 d4 , d1 , d17
FMAC_I2 d5 , d1 , d16 FMAC_I2 d5 , d1 , d16


fstmiad CO1, { d4 - d5 }
vstmia.f64 CO1, { d4 - d5 }


add CO1, CO1, #16 add CO1, CO1, #16




+ 10
- 10
kernel/arm/zgemm_tcopy_2_vfp.S View File

@@ -74,13 +74,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY2x2 .macro COPY2x2


pld [ AO1, #A_PRE ] pld [ AO1, #A_PRE ]
fldmiad AO1, { d0 - d3 }
vldmia.f64 AO1, { d0 - d3 }


add r3, AO1, LDA add r3, AO1, LDA
pld [ r3, #A_PRE ] pld [ r3, #A_PRE ]
fldmiad r3, { d4 - d7 }
vldmia.f64 r3, { d4 - d7 }


fstmiad BO1, { d0 - d7 }
vstmia.f64 BO1, { d0 - d7 }
add AO1, AO1, #32 add AO1, AO1, #32
add BO1, BO1, M4 add BO1, BO1, M4


@@ -88,12 +88,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY1x2 .macro COPY1x2


fldmiad AO1, { d0 -d1 }
vldmia.f64 AO1, { d0 -d1 }


add r3, AO1, LDA add r3, AO1, LDA
fldmiad r3, { d2 - d3 }
vldmia.f64 r3, { d2 - d3 }


fstmiad BO2, { d0 - d3 }
vstmia.f64 BO2, { d0 - d3 }
add AO1, AO1, #16 add AO1, AO1, #16
add BO2, BO2, #32 add BO2, BO2, #32


@@ -102,9 +102,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*************************************************************************************************************************/ /*************************************************************************************************************************/
.macro COPY2x1 .macro COPY2x1


fldmiad AO1, { d0 - d3 }
vldmia.f64 AO1, { d0 - d3 }


fstmiad BO1, { d0 - d3 }
vstmia.f64 BO1, { d0 - d3 }
add AO1, AO1, #32 add AO1, AO1, #32
add BO1, BO1, M4 add BO1, BO1, M4


@@ -112,9 +112,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro COPY1x1 .macro COPY1x1


fldmiad AO1, { d0 - d1 }
vldmia.f64 AO1, { d0 - d1 }


fstmiad BO2, { d0 - d1 }
vstmia.f64 BO2, { d0 - d1 }
add AO1, AO1, #16 add AO1, AO1, #16
add BO2, BO2, #16 add BO2, BO2, #16




+ 16
- 16
kernel/arm/zgemv_n_vfp.S View File

@@ -204,7 +204,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R fldd d0, ALPHA_R
fldd d1, ALPHA_I fldd d1, ALPHA_I


fldmiad YO, { d4 - d7 }
vldmia.f64 YO, { d4 - d7 }


FMAC_R1 d4 , d0 , d8 FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9 FMAC_I1 d5 , d0 , d9
@@ -216,9 +216,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d11 FMAC_R2 d6 , d1 , d11
FMAC_I2 d7 , d1 , d10 FMAC_I2 d7 , d1 , d10


fstmiad YO!, { d4 - d7 }
vstmia.f64 YO!, { d4 - d7 }


fldmiad YO, { d4 - d7 }
vldmia.f64 YO, { d4 - d7 }


FMAC_R1 d4 , d0 , d12 FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13 FMAC_I1 d5 , d0 , d13
@@ -230,7 +230,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d15 FMAC_R2 d6 , d1 , d15
FMAC_I2 d7 , d1 , d14 FMAC_I2 d7 , d1 , d14


fstmiad YO!, { d4 - d7 }
vstmia.f64 YO!, { d4 - d7 }


.endm .endm


@@ -269,14 +269,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R fldd d0, ALPHA_R
fldd d1, ALPHA_I fldd d1, ALPHA_I


fldmiad YO, { d4 - d5 }
vldmia.f64 YO, { d4 - d5 }


FMAC_R1 d4 , d0 , d8 FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9 FMAC_I1 d5 , d0 , d9
FMAC_R2 d4 , d1 , d9 FMAC_R2 d4 , d1 , d9
FMAC_I2 d5 , d1 , d8 FMAC_I2 d5 , d1 , d8


fstmiad YO, { d4 - d5 }
vstmia.f64 YO, { d4 - d5 }


add YO, YO, #16 add YO, YO, #16


@@ -352,47 +352,47 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R fldd d0, ALPHA_R
fldd d1, ALPHA_I fldd d1, ALPHA_I


fldmiad YO, { d4 - d5 }
vldmia.f64 YO, { d4 - d5 }


FMAC_R1 d4 , d0 , d8 FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9 FMAC_I1 d5 , d0 , d9
FMAC_R2 d4 , d1 , d9 FMAC_R2 d4 , d1 , d9
FMAC_I2 d5 , d1 , d8 FMAC_I2 d5 , d1 , d8


fstmiad YO, { d4 - d5 }
vstmia.f64 YO, { d4 - d5 }


add YO, YO, INC_Y add YO, YO, INC_Y


fldmiad YO, { d6 - d7 }
vldmia.f64 YO, { d6 - d7 }


FMAC_R1 d6 , d0 , d10 FMAC_R1 d6 , d0 , d10
FMAC_I1 d7 , d0 , d11 FMAC_I1 d7 , d0 , d11
FMAC_R2 d6 , d1 , d11 FMAC_R2 d6 , d1 , d11
FMAC_I2 d7 , d1 , d10 FMAC_I2 d7 , d1 , d10


fstmiad YO, { d6 - d7 }
vstmia.f64 YO, { d6 - d7 }


add YO, YO, INC_Y add YO, YO, INC_Y


fldmiad YO, { d4 - d5 }
vldmia.f64 YO, { d4 - d5 }


FMAC_R1 d4 , d0 , d12 FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13 FMAC_I1 d5 , d0 , d13
FMAC_R2 d4 , d1 , d13 FMAC_R2 d4 , d1 , d13
FMAC_I2 d5 , d1 , d12 FMAC_I2 d5 , d1 , d12


fstmiad YO, { d4 - d5 }
vstmia.f64 YO, { d4 - d5 }


add YO, YO, INC_Y add YO, YO, INC_Y


fldmiad YO, { d6 - d7 }
vldmia.f64 YO, { d6 - d7 }


FMAC_R1 d6 , d0 , d14 FMAC_R1 d6 , d0 , d14
FMAC_I1 d7 , d0 , d15 FMAC_I1 d7 , d0 , d15
FMAC_R2 d6 , d1 , d15 FMAC_R2 d6 , d1 , d15
FMAC_I2 d7 , d1 , d14 FMAC_I2 d7 , d1 , d14


fstmiad YO, { d6 - d7 }
vstmia.f64 YO, { d6 - d7 }


add YO, YO, INC_Y add YO, YO, INC_Y


@@ -433,14 +433,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R fldd d0, ALPHA_R
fldd d1, ALPHA_I fldd d1, ALPHA_I


fldmiad YO, { d4 - d5 }
vldmia.f64 YO, { d4 - d5 }


FMAC_R1 d4 , d0 , d8 FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9 FMAC_I1 d5 , d0 , d9
FMAC_R2 d4 , d1 , d9 FMAC_R2 d4 , d1 , d9
FMAC_I2 d5 , d1 , d8 FMAC_I2 d5 , d1 , d8


fstmiad YO, { d4 - d5 }
vstmia.f64 YO, { d4 - d5 }


add YO, YO, INC_Y add YO, YO, INC_Y




+ 20
- 20
kernel/arm/zgemv_t_vfp.S View File

@@ -151,12 +151,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F2X1 .macro KERNEL_F2X1


fldmiad XO! , { d2 - d3 }
fldmiad AO1!, { d4 - d5 }
vldmia.f64 XO! , { d2 - d3 }
vldmia.f64 AO1!, { d4 - d5 }


fmacd d12 , d4 , d2 fmacd d12 , d4 , d2
fmacd d13 , d4 , d3 fmacd d13 , d4 , d3
fldmiad AO2!, { d8 - d9 }
vldmia.f64 AO2!, { d8 - d9 }
KMAC_R d12 , d5 , d3 KMAC_R d12 , d5 , d3
KMAC_I d13 , d5 , d2 KMAC_I d13 , d5 , d2


@@ -169,7 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_F2 .macro SAVE_F2


fldmiad YO, { d4 - d7 }
vldmia.f64 YO, { d4 - d7 }


FMAC_R1 d4 , d0 , d12 FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13 FMAC_I1 d5 , d0 , d13
@@ -181,7 +181,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d15 FMAC_R2 d6 , d1 , d15
FMAC_I2 d7 , d1 , d14 FMAC_I2 d7 , d1 , d14


fstmiad YO!, { d4 - d7 }
vstmia.f64 YO!, { d4 - d7 }


.endm .endm


@@ -205,8 +205,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_F1X1 .macro KERNEL_F1X1


fldmiad XO! , { d2 - d3 }
fldmiad AO1!, { d4 - d5 }
vldmia.f64 XO! , { d2 - d3 }
vldmia.f64 AO1!, { d4 - d5 }


fmacd d12 , d4 , d2 fmacd d12 , d4 , d2
fmacd d13 , d4 , d3 fmacd d13 , d4 , d3
@@ -217,14 +217,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_F1 .macro SAVE_F1


fldmiad YO, { d4 - d5 }
vldmia.f64 YO, { d4 - d5 }


FMAC_R1 d4 , d0 , d12 FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13 FMAC_I1 d5 , d0 , d13
FMAC_R2 d4 , d1 , d13 FMAC_R2 d4 , d1 , d13
FMAC_I2 d5 , d1 , d12 FMAC_I2 d5 , d1 , d12


fstmiad YO!, { d4 - d5 }
vstmia.f64 YO!, { d4 - d5 }


.endm .endm


@@ -250,9 +250,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S2X1 .macro KERNEL_S2X1


fldmiad XO , { d2 - d3 }
fldmiad AO1!, { d4 - d5 }
fldmiad AO2!, { d8 - d9 }
vldmia.f64 XO , { d2 - d3 }
vldmia.f64 AO1!, { d4 - d5 }
vldmia.f64 AO2!, { d8 - d9 }


fmacd d12 , d4 , d2 fmacd d12 , d4 , d2
fmacd d13 , d4 , d3 fmacd d13 , d4 , d3
@@ -270,25 +270,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S2 .macro SAVE_S2


fldmiad YO, { d4 - d5 }
vldmia.f64 YO, { d4 - d5 }


FMAC_R1 d4 , d0 , d12 FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13 FMAC_I1 d5 , d0 , d13
FMAC_R2 d4 , d1 , d13 FMAC_R2 d4 , d1 , d13
FMAC_I2 d5 , d1 , d12 FMAC_I2 d5 , d1 , d12


fstmiad YO, { d4 - d5 }
vstmia.f64 YO, { d4 - d5 }


add YO, YO, INC_Y add YO, YO, INC_Y


fldmiad YO, { d6 - d7 }
vldmia.f64 YO, { d6 - d7 }


FMAC_R1 d6 , d0 , d14 FMAC_R1 d6 , d0 , d14
FMAC_I1 d7 , d0 , d15 FMAC_I1 d7 , d0 , d15
FMAC_R2 d6 , d1 , d15 FMAC_R2 d6 , d1 , d15
FMAC_I2 d7 , d1 , d14 FMAC_I2 d7 , d1 , d14


fstmiad YO, { d6 - d7 }
vstmia.f64 YO, { d6 - d7 }


add YO, YO, INC_Y add YO, YO, INC_Y


@@ -314,8 +314,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro KERNEL_S1X1 .macro KERNEL_S1X1


fldmiad XO , { d2 - d3 }
fldmiad AO1!, { d4 - d5 }
vldmia.f64 XO , { d2 - d3 }
vldmia.f64 AO1!, { d4 - d5 }


fmacd d12 , d4 , d2 fmacd d12 , d4 , d2
fmacd d13 , d4 , d3 fmacd d13 , d4 , d3
@@ -328,14 +328,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro SAVE_S1 .macro SAVE_S1


fldmiad YO, { d4 - d5 }
vldmia.f64 YO, { d4 - d5 }


FMAC_R1 d4 , d0 , d12 FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13 FMAC_I1 d5 , d0 , d13
FMAC_R2 d4 , d1 , d13 FMAC_R2 d4 , d1 , d13
FMAC_I2 d5 , d1 , d12 FMAC_I2 d5 , d1 , d12


fstmiad YO, { d4 - d5 }
vstmia.f64 YO, { d4 - d5 }


add YO, YO, INC_Y add YO, YO, INC_Y




Loading…
Cancel
Save