Browse Source

Merge pull request #2357 from chenxuqiang/dgemm_beta_zero

kernel/arm64/dgemm_beta.S: add beta == zero branch
tags/v0.3.8^2
Martin Kroeker GitHub 6 years ago
parent
commit
456ee2e1f0
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 69 additions and 0 deletions
  1. +69
    -0
      kernel/arm64/dgemm_beta.S

+ 69
- 0
kernel/arm64/dgemm_beta.S View File

@@ -80,6 +80,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add sp, sp, #(11*16)
.endm

.macro INIT_ZERO
fmul v0.2d, v0.2d, betaV0
fmul v1.2d, v1.2d, betaV0
fmul v2.2d, v2.2d, betaV0
fmul v3.2d, v3.2d, betaV0
fmul v4.2d, v4.2d, betaV0
fmul v5.2d, v5.2d, betaV0
fmul v6.2d, v6.2d, betaV0
fmul v7.2d, v7.2d, betaV0
.endm

/**************************************************************************************
* End of macro definitions
**************************************************************************************/
@@ -97,6 +108,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmp N, #0
ble .Lgemm_beta_L999

fcmp BETA, #0.0
beq .Lgemm_beta_zero_01

.Lgemm_beta_01:

lsl LDC, LDC, #3
@@ -180,4 +194,59 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
RESTORE_REGS
ret

.Lgemm_beta_zero_01:
INIT_ZERO
lsl LDC, LDC, #3

.align 5
.Lgemm_beta_zero_02:
mov A01, C00
add C00, C00, LDC

asr I, M, #4
cmp I, #0
ble .Lgemm_beta_zero_04

add A02, A01, #64

.align 5
.Lgemm_beta_zero_03:
st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [A01]
add A01, A01, calc_size
st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [A02]
add A02, A02, calc_size
subs I, I, #1
bne .Lgemm_beta_zero_03

.align 5
.Lgemm_beta_zero_04:

and I, M, #15
cmp I, #0
ble .Lgemm_beta_zero_06

.align 5
.Lgemm_beta_zero_05:

str beta0, [A01]
add A01, A01, #8

subs I, I, #1
bne .Lgemm_beta_zero_05

.align 5
.Lgemm_beta_zero_06:

subs N, N, #1
bne .Lgemm_beta_zero_02

.align 5
.Lgemm_beta_zero_L999:

mov x0, #0
RESTORE_REGS
ret

EPILOGUE

Loading…
Cancel
Save