Browse Source

Fix a build issue for PRESCOTT on x86_64

Define everything the same way for PRESCOTT as for PENTIUM4.

Signed-off-by: Zoltán Böszörményi <zboszor@gmail.com>
pull/4650/head
Zoltán Böszörményi 1 year ago
parent
commit
461069451f
30 changed files with 46 additions and 46 deletions
  1. +1
    -1
      common_x86.h
  2. +1
    -1
      kernel/x86_64/dgemm_ncopy_4.S
  3. +1
    -1
      kernel/x86_64/dgemm_tcopy_2.S
  4. +1
    -1
      kernel/x86_64/dgemm_tcopy_4.S
  5. +4
    -4
      kernel/x86_64/gemm_kernel_8x4_sse.S
  6. +1
    -1
      kernel/x86_64/gemm_ncopy_4.S
  7. +4
    -4
      kernel/x86_64/gemm_tcopy_4.S
  8. +1
    -1
      kernel/x86_64/izamax_sse2.S
  9. +1
    -1
      kernel/x86_64/symv_L_sse.S
  10. +1
    -1
      kernel/x86_64/symv_L_sse2.S
  11. +1
    -1
      kernel/x86_64/symv_U_sse.S
  12. +1
    -1
      kernel/x86_64/symv_U_sse2.S
  13. +1
    -1
      kernel/x86_64/trsm_kernel_LN_8x4_sse.S
  14. +1
    -1
      kernel/x86_64/trsm_kernel_LT_8x4_sse.S
  15. +1
    -1
      kernel/x86_64/trsm_kernel_RT_8x4_sse.S
  16. +1
    -1
      kernel/x86_64/zasum_sse2.S
  17. +4
    -4
      kernel/x86_64/zgemm3m_kernel_8x4_sse.S
  18. +2
    -2
      kernel/x86_64/zgemm_kernel_4x2_sse.S
  19. +4
    -4
      kernel/x86_64/zgemm_ncopy_2.S
  20. +4
    -4
      kernel/x86_64/zgemm_tcopy_2.S
  21. +1
    -1
      kernel/x86_64/zsum_sse2.S
  22. +1
    -1
      kernel/x86_64/zsymv_L_sse.S
  23. +1
    -1
      kernel/x86_64/zsymv_L_sse2.S
  24. +1
    -1
      kernel/x86_64/zsymv_U_sse.S
  25. +1
    -1
      kernel/x86_64/zsymv_U_sse2.S
  26. +1
    -1
      kernel/x86_64/ztrsm_kernel_LN_4x2_sse.S
  27. +1
    -1
      kernel/x86_64/ztrsm_kernel_LT_4x2_sse.S
  28. +1
    -1
      kernel/x86_64/ztrsm_kernel_RT_4x2_sse.S
  29. +1
    -1
      l1param.h
  30. +1
    -1
      l2param.h

+ 1
- 1
common_x86.h View File

@@ -229,7 +229,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
#define EMMS
#endif

#if defined(CORE2) || defined(PENTIUM4)
#if defined(CORE2) || defined(PENTIUM4) || defined(PRESCOTT)
#define movapd movaps
#endif



+ 1
- 1
kernel/x86_64/dgemm_ncopy_4.S View File

@@ -39,7 +39,7 @@
#define ASSEMBLER
#include "common.h"

#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
#define PREFETCHSIZE 16
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0


+ 1
- 1
kernel/x86_64/dgemm_tcopy_2.S View File

@@ -39,7 +39,7 @@
#define ASSEMBLER
#include "common.h"

#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
#define PREFETCHSIZE 16
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0


+ 1
- 1
kernel/x86_64/dgemm_tcopy_4.S View File

@@ -39,7 +39,7 @@
#define ASSEMBLER
#include "common.h"

#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
#define PREFETCHSIZE 16
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0


+ 4
- 4
kernel/x86_64/gemm_kernel_8x4_sse.S View File

@@ -1713,7 +1713,7 @@
ALIGN_4

.L52:
#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
movss 0 * SIZE(B), %xmm0
movss 1 * SIZE(B), %xmm1
movss 2 * SIZE(B), %xmm2
@@ -1801,7 +1801,7 @@
ALIGN_4

.L54:
#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
movss 0 * SIZE(B), %xmm0
movss 1 * SIZE(B), %xmm1

@@ -2689,7 +2689,7 @@


.L102:
#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
movss 0 * SIZE(B), %xmm0
movss 1 * SIZE(B), %xmm1
movss 2 * SIZE(B), %xmm2
@@ -2777,7 +2777,7 @@
ALIGN_4

.L104:
#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
movss 0 * SIZE(B), %xmm0
shufps $0, %xmm0, %xmm0
movaps %xmm0, 0 * SIZE(BO)


+ 1
- 1
kernel/x86_64/gemm_ncopy_4.S View File

@@ -39,7 +39,7 @@
#define ASSEMBLER
#include "common.h"

#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
#define RPREFETCHSIZE 16
#define WPREFETCHSIZE (RPREFETCHSIZE * 4)
#define PREFETCH prefetcht0


+ 4
- 4
kernel/x86_64/gemm_tcopy_4.S View File

@@ -39,7 +39,7 @@
#define ASSEMBLER
#include "common.h"

#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
#define RPREFETCHSIZE 16
#define WPREFETCHSIZE (RPREFETCHSIZE * 4)
#define PREFETCH prefetcht0
@@ -204,7 +204,7 @@
movlps 0 * SIZE(AO4), %xmm3
movhps 2 * SIZE(AO4), %xmm3

#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
PREFETCH RPREFETCHSIZE * SIZE(AO1)
PREFETCH RPREFETCHSIZE * SIZE(AO2)
PREFETCH RPREFETCHSIZE * SIZE(AO3)
@@ -362,7 +362,7 @@
movlps 0 * SIZE(AO2), %xmm1
movhps 2 * SIZE(AO2), %xmm1

#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
PREFETCH RPREFETCHSIZE * SIZE(AO1)
PREFETCH RPREFETCHSIZE * SIZE(AO2)
PREFETCHW WPREFETCHSIZE * SIZE(BO)
@@ -381,7 +381,7 @@
movsd 2 * SIZE(AO2), %xmm3
movhpd 3 * SIZE(AO2), %xmm3

#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
PREFETCH RPREFETCHSIZE * SIZE(AO1)
PREFETCH RPREFETCHSIZE * SIZE(AO2)
PREFETCHW WPREFETCHSIZE * SIZE(BO)


+ 1
- 1
kernel/x86_64/izamax_sse2.S View File

@@ -473,7 +473,7 @@
prefetch PREFETCHSIZE * SIZE(X)
#endif

#ifdef PENTIUM4
#if defined(PENTIUM4) || defined(PRESCOTT)
prefetchnta PREFETCHSIZE * SIZE(X)
#endif



+ 1
- 1
kernel/x86_64/symv_L_sse.S View File

@@ -63,7 +63,7 @@
#define PREFETCHSIZE (16 * 12)
#endif

#ifdef PENTIUM4
#if defined(PENTIUM4) || defined(PRESCOTT)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 20)


+ 1
- 1
kernel/x86_64/symv_L_sse2.S View File

@@ -63,7 +63,7 @@
#define PREFETCHSIZE (16 * 12)
#endif

#ifdef PENTIUM4
#if defined(PENTIUM4) || defined(PRESCOTT)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 20)


+ 1
- 1
kernel/x86_64/symv_U_sse.S View File

@@ -63,7 +63,7 @@
#define PREFETCHSIZE (16 * 12)
#endif

#ifdef PENTIUM4
#if defined(PENTIUM4) || defined(PRESCOTT)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 20)


+ 1
- 1
kernel/x86_64/symv_U_sse2.S View File

@@ -63,7 +63,7 @@
#define PREFETCHSIZE (16 * 24)
#endif

#ifdef PENTIUM4
#if defined(PENTIUM4) || defined(PRESCOTT)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 20)


+ 1
- 1
kernel/x86_64/trsm_kernel_LN_8x4_sse.S View File

@@ -81,7 +81,7 @@
#define BORIG 48(%rsp)
#define BUFFER 128(%rsp)

#ifdef PENTIUM4
#if defined(PENTIUM4) || defined(PRESCOTT)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#endif


+ 1
- 1
kernel/x86_64/trsm_kernel_LT_8x4_sse.S View File

@@ -81,7 +81,7 @@
#define BORIG 48(%rsp)
#define BUFFER 128(%rsp)

#ifdef PENTIUM4
#if defined(PENTIUM4) || defined(PRESCOTT)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#endif


+ 1
- 1
kernel/x86_64/trsm_kernel_RT_8x4_sse.S View File

@@ -81,7 +81,7 @@
#define BORIG 48(%rsp)
#define BUFFER 128(%rsp)

#ifdef PENTIUM4
#if defined(PENTIUM4) || defined(PRESCOTT)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#endif


+ 1
- 1
kernel/x86_64/zasum_sse2.S View File

@@ -249,7 +249,7 @@
prefetcht0 PREFETCHSIZE * SIZE(X)
#endif

#ifdef PENTIUM4
#if defined(PENTIUM4) || defined(PRESCOTT)
prefetchnta PREFETCHSIZE * SIZE(X)
#endif



+ 4
- 4
kernel/x86_64/zgemm3m_kernel_8x4_sse.S View File

@@ -1792,7 +1792,7 @@
ALIGN_4

.L52:
#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
movss 0 * SIZE(B), %xmm0
movss 1 * SIZE(B), %xmm1
movss 2 * SIZE(B), %xmm2
@@ -1880,7 +1880,7 @@
ALIGN_4

.L54:
#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
movss 0 * SIZE(B), %xmm0
movss 1 * SIZE(B), %xmm1

@@ -2763,7 +2763,7 @@


.L102:
#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
movss 0 * SIZE(B), %xmm0
movss 1 * SIZE(B), %xmm1
movss 2 * SIZE(B), %xmm2
@@ -2851,7 +2851,7 @@
ALIGN_4

.L104:
#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
movss 0 * SIZE(B), %xmm0
shufps $0, %xmm0, %xmm0
movaps %xmm0, 0 * SIZE(BO)


+ 2
- 2
kernel/x86_64/zgemm_kernel_4x2_sse.S View File

@@ -93,7 +93,7 @@
#define PREFETCHSIZE (16 * 5 + 8)
#endif

#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE 160
@@ -1573,7 +1573,7 @@
movaps %xmm14, 24 * SIZE(BO)
movaps %xmm15, 28 * SIZE(BO)

#if defined(PENTIUM4) || defined(GENERIC)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC)
PREFETCHW 128 * SIZE(BO)
PREFETCH 112 * SIZE(B)
#endif


+ 4
- 4
kernel/x86_64/zgemm_ncopy_2.S View File

@@ -74,7 +74,7 @@
#define WPREFETCHSIZE 48
#endif

#if defined(PENTIUM4) || defined(GENERIC) || defined(NANO)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC) || defined(NANO)
#define RPREFETCHSIZE 32
#define WPREFETCHSIZE 80
#endif
@@ -150,7 +150,7 @@
movlps 6 * SIZE(AO1), %xmm3
movhps 6 * SIZE(AO2), %xmm3

#if defined(PENTIUM4) || defined(GENERIC) || defined(NANO)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC) || defined(NANO)
prefetcht0 RPREFETCHSIZE * SIZE(AO1)
prefetcht0 RPREFETCHSIZE * SIZE(AO2)

@@ -191,7 +191,7 @@
movsd 6 * SIZE(AO2), %xmm7
movhpd 7 * SIZE(AO2), %xmm7

#if defined(PENTIUM4) || defined(GENERIC) || defined(NANO)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC) || defined(NANO)
prefetcht0 RPREFETCHSIZE * SIZE(AO1)
prefetcht0 RPREFETCHSIZE * SIZE(AO2)

@@ -295,7 +295,7 @@
movapd %xmm3, 6 * SIZE(B)
#endif

#if defined(PENTIUM4) || defined(GENERIC) || defined(NANO)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC) || defined(NANO)
prefetcht0 RPREFETCHSIZE * SIZE(AO1)
prefetcht0 RPREFETCHSIZE * SIZE(AO2)



+ 4
- 4
kernel/x86_64/zgemm_tcopy_2.S View File

@@ -153,7 +153,7 @@
movlps 4 * SIZE(AO2), %xmm3
movhps 6 * SIZE(AO2), %xmm3

#if defined(PENTIUM4) || defined(GENERIC) || defined(NANO)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC) || defined(NANO)
prefetcht0 RPREFETCHSIZE * SIZE(AO1)
prefetcht0 RPREFETCHSIZE * SIZE(AO2)
prefetcht0 WPREFETCHSIZE * SIZE(BO)
@@ -197,7 +197,7 @@
movsd 6 * SIZE(AO2), %xmm7
movhpd 7 * SIZE(AO2), %xmm7

#if defined(PENTIUM4) || defined(GENERIC) || defined(NANO)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC) || defined(NANO)
prefetcht0 RPREFETCHSIZE * SIZE(AO1)
prefetcht0 RPREFETCHSIZE * SIZE(AO2)
prefetcht0 WPREFETCHSIZE * SIZE(BO)
@@ -316,7 +316,7 @@
movlps 4 * SIZE(AO1), %xmm1
movhps 6 * SIZE(AO1), %xmm1

#if defined(PENTIUM4) || defined(GENERIC) || defined(NANO)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC) || defined(NANO)
prefetcht0 RPREFETCHSIZE * SIZE(AO1)
prefetcht0 WPREFETCHSIZE * SIZE(BO)
#endif
@@ -339,7 +339,7 @@
movsd 6 * SIZE(AO1), %xmm3
movhpd 7 * SIZE(AO1), %xmm3

#if defined(PENTIUM4) || defined(GENERIC) || defined(NANO)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(GENERIC) || defined(NANO)
prefetcht0 RPREFETCHSIZE * SIZE(AO1)
prefetcht0 WPREFETCHSIZE * SIZE(BO)
#endif


+ 1
- 1
kernel/x86_64/zsum_sse2.S View File

@@ -219,7 +219,7 @@
prefetcht0 PREFETCHSIZE * SIZE(X)
#endif

#ifdef PENTIUM4
#if defined(PENTIUM4) || defined(PRESCOTT)
prefetchnta PREFETCHSIZE * SIZE(X)
#endif



+ 1
- 1
kernel/x86_64/zsymv_L_sse.S View File

@@ -63,7 +63,7 @@
#define PREFETCHSIZE (16 * 24)
#endif

#ifdef PENTIUM4
#if defined(PENTIUM4) || defined(PRESCOTT)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 28)


+ 1
- 1
kernel/x86_64/zsymv_L_sse2.S View File

@@ -63,7 +63,7 @@
#define PREFETCHSIZE (16 * 24)
#endif

#ifdef PENTIUM4
#if defined(PENTIUM4) || defined(PRESCOTT)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 28)


+ 1
- 1
kernel/x86_64/zsymv_U_sse.S View File

@@ -63,7 +63,7 @@
#define PREFETCHSIZE (16 * 24)
#endif

#ifdef PENTIUM4
#if defined(PENTIUM4) || defined(PRESCOTT)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 28)


+ 1
- 1
kernel/x86_64/zsymv_U_sse2.S View File

@@ -63,7 +63,7 @@
#define PREFETCHSIZE (16 * 24)
#endif

#ifdef PENTIUM4
#if defined(PENTIUM4) || defined(PRESCOTT)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 28)


+ 1
- 1
kernel/x86_64/ztrsm_kernel_LN_4x2_sse.S View File

@@ -88,7 +88,7 @@
#define movsd movlps
#endif

#if defined(PENTIUM4) || defined(CORE2) || defined(PENRYN) || defined(DUNNINGTON) || defined(ATOM) || defined(NANO)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(CORE2) || defined(PENRYN) || defined(DUNNINGTON) || defined(ATOM) || defined(NANO)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHNTA prefetchnta


+ 1
- 1
kernel/x86_64/ztrsm_kernel_LT_4x2_sse.S View File

@@ -88,7 +88,7 @@
#define movsd movlps
#endif

#if defined(PENTIUM4) || defined(CORE2) || defined(PENRYN) || defined(DUNNINGTON) || defined(ATOM) || defined(NANO)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(CORE2) || defined(PENRYN) || defined(DUNNINGTON) || defined(ATOM) || defined(NANO)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHNTA prefetchnta


+ 1
- 1
kernel/x86_64/ztrsm_kernel_RT_4x2_sse.S View File

@@ -88,7 +88,7 @@
#define movsd movlps
#endif

#if defined(PENTIUM4) || defined(CORE2) || defined(PENRYN) || defined(DUNNINGTON) || defined(ATOM) || defined(NANO)
#if defined(PENTIUM4) || defined(PRESCOTT) || defined(CORE2) || defined(PENRYN) || defined(DUNNINGTON) || defined(ATOM) || defined(NANO)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHNTA prefetchnta


+ 1
- 1
l1param.h View File

@@ -31,7 +31,7 @@
#define movsd movlps
#endif

#ifdef PENTIUM4
#if defined(PENTIUM4) || defined(PRESCOTT)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (128 * 10)
#define FETCH128


+ 1
- 1
l2param.h View File

@@ -19,7 +19,7 @@
#define PREFETCHSIZE 64 * 3
#endif

#ifdef PENTIUM4
#if defined(PENTIUM4) || defined(PRESCOTT)
#define ALIGNED_ACCESS
#define MOVUPS_A movaps
#define MOVUPS_XL movaps


Loading…
Cancel
Save