Browse Source

Refs #173. Fixed overflow internal buffer bug of gemv_n on x86

tags/v0.2.6
Zhang Xianyi 13 years ago
parent
commit
69200884e1
2 changed files with 112 additions and 23 deletions
  1. +55
    -12
      kernel/x86/gemv_n_sse.S
  2. +57
    -11
      kernel/x86/gemv_n_sse2.S

+ 55
- 12
kernel/x86/gemv_n_sse.S View File

@@ -89,17 +89,22 @@
#endif

#define STACKSIZE 16

#define M 4 + STACKSIZE(%esp)
#define N 8 + STACKSIZE(%esp)
#define ALPHA 16 + STACKSIZE(%esp)
#define A 20 + STACKSIZE(%esp)
#define STACK_LDA 24 + STACKSIZE(%esp)
#define STACK_X 28 + STACKSIZE(%esp)
#define STACK_INCX 32 + STACKSIZE(%esp)
#define Y 36 + STACKSIZE(%esp)
#define STACK_INCY 40 + STACKSIZE(%esp)
#define BUFFER 44 + STACKSIZE(%esp)
#define ARGS 16

#define M 4 + STACKSIZE+ARGS(%esp)
#define N 8 + STACKSIZE+ARGS(%esp)
#define ALPHA 16 + STACKSIZE+ARGS(%esp)
#define A 20 + STACKSIZE+ARGS(%esp)
#define STACK_LDA 24 + STACKSIZE+ARGS(%esp)
#define STACK_X 28 + STACKSIZE+ARGS(%esp)
#define STACK_INCX 32 + STACKSIZE+ARGS(%esp)
#define Y 36 + STACKSIZE+ARGS(%esp)
#define STACK_INCY 40 + STACKSIZE+ARGS(%esp)
#define BUFFER 44 + STACKSIZE+ARGS(%esp)
#define MMM 0+ARGS(%esp)
#define YY 4+ARGS(%esp)
#define AA 8+ARGS(%esp)
#define LDAX 12+ARGS(%esp)
#define I %eax
#define J %ebx
@@ -114,6 +119,7 @@

PROLOGUE

subl $ARGS,%esp
pushl %ebp
pushl %edi
pushl %esi
@@ -121,7 +127,34 @@

PROFCODE

movl Y,J
movl J,YY # backup Y
movl A,J
movl J,AA # backup A
movl M,J
movl J,MMM # backup MM
.L0t:
xorl J,J
addl $1,J
sall $21,J
subl J,MMM
movl J,M
jge .L00t
ALIGN_4

movl MMM,%eax
addl J,%eax
jle .L999x
movl %eax,M

.L00t:
movl AA,%eax
movl %eax,A

movl YY,J
movl J,Y
movl STACK_LDA, LDA

movl STACK_X, X
movl STACK_INCX, INCX

@@ -651,12 +684,22 @@
addss 0 * SIZE(X), %xmm0
movss %xmm0, (Y1)
ALIGN_3

.L999:
movl M,J
leal (,J,SIZE),%eax
addl %eax,AA
movl YY,J
addl %eax,J
movl J,YY
jmp .L0t
ALIGN_4

.L999x:
popl %ebx
popl %esi
popl %edi
popl %ebp
addl $ARGS,%esp
ret

EPILOGUE

+ 57
- 11
kernel/x86/gemv_n_sse2.S View File

@@ -76,17 +76,22 @@
#endif

#define STACKSIZE 16

#define M 4 + STACKSIZE(%esp)
#define N 8 + STACKSIZE(%esp)
#define ALPHA 16 + STACKSIZE(%esp)
#define A 24 + STACKSIZE(%esp)
#define STACK_LDA 28 + STACKSIZE(%esp)
#define STACK_X 32 + STACKSIZE(%esp)
#define STACK_INCX 36 + STACKSIZE(%esp)
#define Y 40 + STACKSIZE(%esp)
#define STACK_INCY 44 + STACKSIZE(%esp)
#define BUFFER 48 + STACKSIZE(%esp)
#define ARGS 16

#define M 4 + STACKSIZE+ARGS(%esp)
#define N 8 + STACKSIZE+ARGS(%esp)
#define ALPHA 16 + STACKSIZE+ARGS(%esp)
#define A 24 + STACKSIZE+ARGS(%esp)
#define STACK_LDA 28 + STACKSIZE+ARGS(%esp)
#define STACK_X 32 + STACKSIZE+ARGS(%esp)
#define STACK_INCX 36 + STACKSIZE+ARGS(%esp)
#define Y 40 + STACKSIZE+ARGS(%esp)
#define STACK_INCY 44 + STACKSIZE+ARGS(%esp)
#define BUFFER 48 + STACKSIZE+ARGS(%esp)

#define MMM 0+ARGS(%esp)
#define YY 4+ARGS(%esp)
#define AA 8+ARGS(%esp)
#define I %eax
#define J %ebx
@@ -101,6 +106,8 @@

PROLOGUE


subl $ARGS,%esp
pushl %ebp
pushl %edi
pushl %esi
@@ -108,6 +115,33 @@

PROFCODE

movl Y,J
movl J,YY # backup Y
movl A,J
movl J,AA # backup A
movl M,J
movl J,MMM # backup MM
.L0t:
xorl J,J
addl $1,J
sall $20,J
subl J,MMM
movl J,M
jge .L00t
ALIGN_4

movl MMM,%eax
addl J,%eax
jle .L999x
movl %eax,M

.L00t:
movl AA,%eax
movl %eax,A

movl YY,J
movl J,Y

movl STACK_LDA, LDA
movl STACK_X, X
movl STACK_INCX, INCX
@@ -677,10 +711,22 @@
ALIGN_3

.L999:
movl M,J
leal (,J,SIZE),%eax
addl %eax,AA
movl YY,J
addl %eax,J
movl J,YY
jmp .L0t
ALIGN_4

.L999x:

popl %ebx
popl %esi
popl %edi
popl %ebp
addl $ARGS,%esp
ret

EPILOGUE

Loading…
Cancel
Save