Browse Source

updated trmm_kernels for armv6

tags/v0.2.16.rc1
Werner Saar 10 years ago
parent
commit
c65357c566
4 changed files with 89 additions and 56 deletions
  1. +31
    -22
      kernel/arm/ctrmm_kernel_2x2_vfp.S
  2. +14
    -6
      kernel/arm/dtrmm_kernel_4x2_vfp.S
  3. +14
    -6
      kernel/arm/strmm_kernel_4x2_vfp.S
  4. +30
    -22
      kernel/arm/ztrmm_kernel_2x2_vfp.S

+ 31
- 22
kernel/arm/ctrmm_kernel_2x2_vfp.S View File

@@ -59,6 +59,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define N [fp, #-260 ]
#define K [fp, #-264 ]

#define FP_ZERO [fp, #-232]
#define FP_ZERO_0 [fp, #-232]
#define FP_ZERO_1 [fp, #-228]


#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]

@@ -136,7 +141,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8
vmov.f32 s10, s8
vmov.f32 s11, s8
@@ -301,10 +306,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5
vsub.f32 s6, s6, s6
vsub.f32 s7, s7, s7
flds s4, FP_ZERO
vmov.f32 s5, s4
vmov.f32 s6, s4
vmov.f32 s7, s4

FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9
@@ -318,10 +323,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

fstmias CO1, { s4 - s7 }

vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5
vsub.f32 s6, s6, s6
vsub.f32 s7, s7, s7
flds s4, FP_ZERO
vmov.f32 s5, s4
vmov.f32 s6, s4
vmov.f32 s7, s4

FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13
@@ -343,7 +348,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8
vmov.f32 s12, s8
vmov.f32 s13, s8
@@ -490,8 +495,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5
flds s4, FP_ZERO
vmov.f32 s5, s4

FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9
@@ -500,8 +505,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

fstmias CO1, { s4 - s5 }

vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5
flds s4, FP_ZERO
vmov.f32 s5, s4

FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13
@@ -519,7 +524,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8
vmov.f32 s10, s8
vmov.f32 s11, s8
@@ -663,10 +668,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5
vsub.f32 s6, s6, s6
vsub.f32 s7, s7, s7
flds s4, FP_ZERO
vmov.f32 s5, s4
vmov.f32 s6, s4
vmov.f32 s7, s4

FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9
@@ -689,7 +694,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8

.endm
@@ -795,8 +800,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5
flds s4, FP_ZERO
vmov.f32 s5, s4

FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9
@@ -831,6 +836,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { s8 - s15} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #3 // ldc = ldc * 4 * 2
str r3, LDC


+ 14
- 6
kernel/arm/dtrmm_kernel_4x2_vfp.S View File

@@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define K [fp, #-264 ]
#define A [fp, #-268 ]

#define FP_ZERO [fp, #-232]
#define FP_ZERO_0 [fp, #-232]
#define FP_ZERO_1 [fp, #-228]

#define ALPHA [fp, #-276 ]

#define B [fp, #4 ]
@@ -90,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x2

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9, d8
vmov.f64 d10, d8
vmov.f64 d11, d8
@@ -165,7 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9, d8
vmov.f64 d12, d8
vmov.f64 d13, d8
@@ -220,7 +224,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d12, d8

.endm
@@ -268,7 +272,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x1

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9, d8
vmov.f64 d10, d8
vmov.f64 d11, d8
@@ -318,7 +322,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9 , d8

.endm
@@ -357,7 +361,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO

.endm

@@ -409,6 +413,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { d8 - d15} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #3 // ldc = ldc * 8
str r3, LDC


+ 14
- 6
kernel/arm/strmm_kernel_4x2_vfp.S View File

@@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define K [fp, #-264 ]
#define A [fp, #-268 ]

#define FP_ZERO [fp, #-232]
#define FP_ZERO_0 [fp, #-232]
#define FP_ZERO_1 [fp, #-228]

#define ALPHA [fp, #-276 ]

#define B [fp, #4 ]
@@ -90,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x2

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9, s8
vmov.f32 s10, s8
vmov.f32 s11, s8
@@ -156,7 +160,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9, s8
vmov.f32 s12, s8
vmov.f32 s13, s8
@@ -211,7 +215,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s12, s8

.endm
@@ -259,7 +263,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x1

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9, s8
vmov.f32 s10, s8
vmov.f32 s11, s8
@@ -309,7 +313,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8

.endm
@@ -348,7 +352,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO

.endm

@@ -400,6 +404,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { s8 - s15} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #2 // ldc = ldc * 4
str r3, LDC


+ 30
- 22
kernel/arm/ztrmm_kernel_2x2_vfp.S View File

@@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define N [fp, #-260 ]
#define K [fp, #-264 ]

#define FP_ZERO [fp, #-232]
#define FP_ZERO_0 [fp, #-232]
#define FP_ZERO_1 [fp, #-228]

#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]

@@ -140,7 +144,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9 , d8
vmov.f64 d10, d8
vmov.f64 d11, d8
@@ -356,10 +360,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5
vsub.f64 d6, d6 , d6
vsub.f64 d7, d7 , d7
fldd d4 , FP_ZERO
vmov.f64 d5 , d4
vmov.f64 d6 , d4
vmov.f64 d7 , d4

FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9
@@ -373,10 +377,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

fstmiad CO1, { d4 - d7 }

vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5
vsub.f64 d6, d6 , d6
vsub.f64 d7, d7 , d7
fldd d4 , FP_ZERO
vmov.f64 d5 , d4
vmov.f64 d6 , d4
vmov.f64 d7 , d4

FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13
@@ -398,7 +402,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9 , d8
vmov.f64 d12, d8
vmov.f64 d13, d8
@@ -545,8 +549,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5
fldd d4 , FP_ZERO
vmov.f64 d5 , d4

FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9
@@ -555,8 +559,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

fstmiad CO1, { d4 - d5 }

vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5
fldd d4 , FP_ZERO
vmov.f64 d5 , d4

FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13
@@ -574,7 +578,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9 , d8
vmov.f64 d10, d8
vmov.f64 d11, d8
@@ -718,10 +722,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5
vsub.f64 d6, d6 , d6
vsub.f64 d7, d7 , d7
fldd d4 , FP_ZERO
vmov.f64 d5 , d4
vmov.f64 d6 , d4
vmov.f64 d7 , d4

FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9
@@ -744,7 +748,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9 , d8

.endm
@@ -850,8 +854,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5
fldd d4 , FP_ZERO
vmov.f64 d5 , d4

FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9
@@ -888,6 +892,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { d8 - d15} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #4 // ldc = ldc * 8 * 2
str r3, LDC


Loading…
Cancel
Save