| @@ -310,6 +310,13 @@ typedef int blasint; | |||||
| #define YIELDING SwitchToThread() | #define YIELDING SwitchToThread() | ||||
| #endif | #endif | ||||
| /*************************************************** | |||||
| Some no-oprations are enough | |||||
| ***************************************************/ | |||||
| #ifdef PILEDRIVER | |||||
| #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n"); | |||||
| #endif | |||||
| #ifndef YIELDING | #ifndef YIELDING | ||||
| #define YIELDING sched_yield() | #define YIELDING sched_yield() | ||||
| #endif | #endif | ||||
| @@ -333,7 +333,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
| for(jjs = js; jjs < js + min_j; jjs += min_jj){ | for(jjs = js; jjs < js + min_j; jjs += min_jj){ | ||||
| min_jj = min_j + js - jjs; | min_jj = min_j + js - jjs; | ||||
| #if defined(BULLDOZER) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX) | |||||
| #if ( defined(BULLDOZER) || defined(PILEDRIVER) ) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX) | |||||
| if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N; | if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N; | ||||
| else | else | ||||
| if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | ||||
| @@ -367,7 +367,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
| for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){ | for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){ | ||||
| min_jj = MIN(n_to, xxx + div_n) - jjs; | min_jj = MIN(n_to, xxx + div_n) - jjs; | ||||
| #if defined(BULLDOZER) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX) | |||||
| #if ( defined(BULLDOZER) || defined(PILEDRIVER) ) && defined(ARCH_X86_64) && !defined(XDOUBLE) && !defined(COMPLEX) | |||||
| if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N; | if (min_jj >= 12*GEMM_UNROLL_N) min_jj = 12*GEMM_UNROLL_N; | ||||
| else | else | ||||
| if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | ||||
| @@ -17,11 +17,11 @@ SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | ||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | ||||
| DGEMMKERNEL = dgemm_kernel_6x4_piledriver.S | |||||
| DGEMMINCOPY = ../generic/gemm_ncopy_6.c | |||||
| DGEMMITCOPY = ../generic/gemm_tcopy_6.c | |||||
| DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
| DGEMMKERNEL = dgemm_kernel_8x2_piledriver.S | |||||
| DGEMMINCOPY = dgemm_ncopy_8_bulldozer.S | |||||
| DGEMMITCOPY = dgemm_tcopy_8_bulldozer.S | |||||
| DGEMMONCOPY = gemm_ncopy_2_bulldozer.S | |||||
| DGEMMOTCOPY = gemm_tcopy_2_bulldozer.S | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | ||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | ||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | ||||
| @@ -330,9 +330,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define XGEMM_DEFAULT_UNROLL_M 1 | #define XGEMM_DEFAULT_UNROLL_M 1 | ||||
| #else | #else | ||||
| #define SGEMM_DEFAULT_UNROLL_N 2 | #define SGEMM_DEFAULT_UNROLL_N 2 | ||||
| #define DGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define DGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define SGEMM_DEFAULT_UNROLL_M 16 | #define SGEMM_DEFAULT_UNROLL_M 16 | ||||
| #define DGEMM_DEFAULT_UNROLL_M 6 | |||||
| #define DGEMM_DEFAULT_UNROLL_M 8 | |||||
| #define QGEMM_DEFAULT_UNROLL_M 2 | #define QGEMM_DEFAULT_UNROLL_M 2 | ||||
| #define CGEMM_DEFAULT_UNROLL_M 4 | #define CGEMM_DEFAULT_UNROLL_M 4 | ||||
| #define ZGEMM_DEFAULT_UNROLL_M 2 | #define ZGEMM_DEFAULT_UNROLL_M 2 | ||||
| @@ -347,7 +347,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(ARCH_X86_64) | #if defined(ARCH_X86_64) | ||||
| #define SGEMM_DEFAULT_P 768 | #define SGEMM_DEFAULT_P 768 | ||||
| #define DGEMM_DEFAULT_P 480 | |||||
| #define DGEMM_DEFAULT_P 384 | |||||
| #else | #else | ||||
| #define SGEMM_DEFAULT_P 448 | #define SGEMM_DEFAULT_P 448 | ||||
| #define DGEMM_DEFAULT_P 480 | #define DGEMM_DEFAULT_P 480 | ||||
| @@ -359,7 +359,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(ARCH_X86_64) | #if defined(ARCH_X86_64) | ||||
| #define SGEMM_DEFAULT_Q 168 | #define SGEMM_DEFAULT_Q 168 | ||||
| #define DGEMM_DEFAULT_Q 128 | |||||
| #define DGEMM_DEFAULT_Q 168 | |||||
| #else | #else | ||||
| #define SGEMM_DEFAULT_Q 224 | #define SGEMM_DEFAULT_Q 224 | ||||
| #define DGEMM_DEFAULT_Q 224 | #define DGEMM_DEFAULT_Q 224 | ||||
| @@ -371,7 +371,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define SGEMM_DEFAULT_R sgemm_r | #define SGEMM_DEFAULT_R sgemm_r | ||||
| #define QGEMM_DEFAULT_R qgemm_r | #define QGEMM_DEFAULT_R qgemm_r | ||||
| #define DGEMM_DEFAULT_R dgemm_r | |||||
| #define DGEMM_DEFAULT_R 12288 | |||||
| #define CGEMM_DEFAULT_R cgemm_r | #define CGEMM_DEFAULT_R cgemm_r | ||||
| #define ZGEMM_DEFAULT_R zgemm_r | #define ZGEMM_DEFAULT_R zgemm_r | ||||
| #define XGEMM_DEFAULT_R xgemm_r | #define XGEMM_DEFAULT_R xgemm_r | ||||