Browse Source

Ref #380: lowered stack usage for piledriver and bulldozer kernels

tags/v0.2.10.rc1^2
wernsaar 11 years ago
parent
commit
73545a79cd
8 changed files with 12 additions and 19 deletions
  1. +1
    -2
      kernel/x86_64/cgemm_kernel_4x2_bulldozer.S
  2. +1
    -3
      kernel/x86_64/cgemm_kernel_4x2_piledriver.S
  3. +2
    -2
      kernel/x86_64/dgemm_kernel_8x2_bulldozer.S
  4. +2
    -2
      kernel/x86_64/dgemm_kernel_8x2_piledriver.S
  5. +2
    -2
      kernel/x86_64/sgemm_kernel_16x2_bulldozer.S
  6. +2
    -2
      kernel/x86_64/sgemm_kernel_16x2_piledriver.S
  7. +1
    -3
      kernel/x86_64/zgemm_kernel_2x2_bulldozer.S
  8. +1
    -3
      kernel/x86_64/zgemm_kernel_2x2_piledriver.S

+ 1
- 2
kernel/x86_64/cgemm_kernel_4x2_bulldozer.S View File

@@ -79,8 +79,7 @@
#endif #endif
#define L_BUFFER_SIZE 512*8*4
#define LB2_OFFSET 512*8*2
#define L_BUFFER_SIZE 8192
#define Ndiv6 24(%rsp) #define Ndiv6 24(%rsp)
#define Nmod6 32(%rsp) #define Nmod6 32(%rsp)


+ 1
- 3
kernel/x86_64/cgemm_kernel_4x2_piledriver.S View File

@@ -104,8 +104,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#define L_BUFFER_SIZE 512*8*4
#define LB2_OFFSET 512*8*2
#define L_BUFFER_SIZE 256*8*4
#define Ndiv6 24(%rsp) #define Ndiv6 24(%rsp)
#define Nmod6 32(%rsp) #define Nmod6 32(%rsp)
@@ -116,7 +115,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define KK 72(%rsp) #define KK 72(%rsp)
#define KKK 80(%rsp) #define KKK 80(%rsp)
#define BUFFER1 128(%rsp) #define BUFFER1 128(%rsp)
#define BUFFER2 LB2_OFFSET+128(%rsp)
#if defined(OS_WINDOWS) #if defined(OS_WINDOWS)
#if L_BUFFER_SIZE > 16384 #if L_BUFFER_SIZE > 16384


+ 2
- 2
kernel/x86_64/dgemm_kernel_8x2_bulldozer.S View File

@@ -148,8 +148,8 @@
#endif #endif
#define L_BUFFER_SIZE 512*8*4
#define LB2_OFFSET 512*8*2
#define L_BUFFER_SIZE 8192
#define LB2_OFFSET 4096
#define Ndiv6 24(%rsp) #define Ndiv6 24(%rsp)
#define Nmod6 32(%rsp) #define Nmod6 32(%rsp)


+ 2
- 2
kernel/x86_64/dgemm_kernel_8x2_piledriver.S View File

@@ -105,8 +105,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#define L_BUFFER_SIZE 512*8*4
#define LB2_OFFSET 512*8*2
#define L_BUFFER_SIZE 8192
#define LB2_OFFSET 4096
#define Ndiv6 24(%rsp) #define Ndiv6 24(%rsp)
#define Nmod6 32(%rsp) #define Nmod6 32(%rsp)


+ 2
- 2
kernel/x86_64/sgemm_kernel_16x2_bulldozer.S View File

@@ -78,8 +78,8 @@
#endif #endif
#define L_BUFFER_SIZE 512*8*4
#define LB2_OFFSET 512*8*2
#define L_BUFFER_SIZE 8192
#define LB2_OFFSET 4096
#define Ndiv6 24(%rsp) #define Ndiv6 24(%rsp)
#define Nmod6 32(%rsp) #define Nmod6 32(%rsp)


+ 2
- 2
kernel/x86_64/sgemm_kernel_16x2_piledriver.S View File

@@ -105,8 +105,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#define L_BUFFER_SIZE 512*8*4
#define LB2_OFFSET 512*8*2
#define L_BUFFER_SIZE 8192
#define LB2_OFFSET 4096
#define Ndiv6 24(%rsp) #define Ndiv6 24(%rsp)
#define Nmod6 32(%rsp) #define Nmod6 32(%rsp)


+ 1
- 3
kernel/x86_64/zgemm_kernel_2x2_bulldozer.S View File

@@ -79,8 +79,7 @@
#endif #endif
#define L_BUFFER_SIZE 512*8*4
#define LB2_OFFSET 512*8*2
#define L_BUFFER_SIZE 8192
#define Ndiv6 24(%rsp) #define Ndiv6 24(%rsp)
#define Nmod6 32(%rsp) #define Nmod6 32(%rsp)
@@ -91,7 +90,6 @@
#define KK 72(%rsp) #define KK 72(%rsp)
#define KKK 80(%rsp) #define KKK 80(%rsp)
#define BUFFER1 128(%rsp) #define BUFFER1 128(%rsp)
#define BUFFER2 LB2_OFFSET+128(%rsp)
#if defined(OS_WINDOWS) #if defined(OS_WINDOWS)
#if L_BUFFER_SIZE > 16384 #if L_BUFFER_SIZE > 16384


+ 1
- 3
kernel/x86_64/zgemm_kernel_2x2_piledriver.S View File

@@ -104,8 +104,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#define L_BUFFER_SIZE 512*8*4
#define LB2_OFFSET 512*8*2
#define L_BUFFER_SIZE 256*8*4
#define Ndiv6 24(%rsp) #define Ndiv6 24(%rsp)
#define Nmod6 32(%rsp) #define Nmod6 32(%rsp)
@@ -116,7 +115,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define KK 72(%rsp) #define KK 72(%rsp)
#define KKK 80(%rsp) #define KKK 80(%rsp)
#define BUFFER1 128(%rsp) #define BUFFER1 128(%rsp)
#define BUFFER2 LB2_OFFSET+128(%rsp)
#if defined(OS_WINDOWS) #if defined(OS_WINDOWS)
#if L_BUFFER_SIZE > 16384 #if L_BUFFER_SIZE > 16384


Loading…
Cancel
Save