Browse Source

Merge pull request #1062 from wernsaar/develop

prepared parameter.c for UNROLL values, that are not a power of two
tags/v0.2.20^2
Werner Saar GitHub 9 years ago
parent
commit
752fdc6f82
3 changed files with 8 additions and 8 deletions
  1. +6
    -6
      driver/others/parameter.c
  2. +1
    -1
      lapack/lauum/lauum_L_parallel.c
  3. +1
    -1
      lapack/lauum/lauum_U_parallel.c

+ 6
- 6
driver/others/parameter.c View File

@@ -497,13 +497,13 @@ void blas_set_parameter(void){
if (xgemm_p == 0) xgemm_p = 64;
#endif

sgemm_p = (sgemm_p + SGEMM_UNROLL_M - 1) & ~(SGEMM_UNROLL_M - 1);
dgemm_p = (dgemm_p + DGEMM_UNROLL_M - 1) & ~(DGEMM_UNROLL_M - 1);
cgemm_p = (cgemm_p + CGEMM_UNROLL_M - 1) & ~(CGEMM_UNROLL_M - 1);
zgemm_p = (zgemm_p + ZGEMM_UNROLL_M - 1) & ~(ZGEMM_UNROLL_M - 1);
sgemm_p = ((sgemm_p + SGEMM_UNROLL_M - 1)/SGEMM_UNROLL_M) * SGEMM_UNROLL_M;
dgemm_p = ((dgemm_p + DGEMM_UNROLL_M - 1)/DGEMM_UNROLL_M) * DGEMM_UNROLL_M;
cgemm_p = ((cgemm_p + CGEMM_UNROLL_M - 1)/CGEMM_UNROLL_M) * CGEMM_UNROLL_M;
zgemm_p = ((zgemm_p + ZGEMM_UNROLL_M - 1)/ZGEMM_UNROLL_M) * ZGEMM_UNROLL_M;
#ifdef QUAD_PRECISION
qgemm_p = (qgemm_p + QGEMM_UNROLL_M - 1) & ~(QGEMM_UNROLL_M - 1);
xgemm_p = (xgemm_p + XGEMM_UNROLL_M - 1) & ~(XGEMM_UNROLL_M - 1);
qgemm_p = ((qgemm_p + QGEMM_UNROLL_M - 1)/QGEMM_UNROLL_M) * QGEMM_UNROLL_M;
xgemm_p = ((xgemm_p + XGEMM_UNROLL_M - 1)/XGEMM_UNROLL_M) * XGEMM_UNROLL_M;
#endif

sgemm_r = (((BUFFER_SIZE - ((SGEMM_P * SGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SGEMM_Q * 4)) - 15) & ~15;


+ 1
- 1
lapack/lauum/lauum_L_parallel.c View File

@@ -88,7 +88,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.beta = NULL;
newarg.nthreads = args -> nthreads;

blocking = (n / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
blocking = ((n / 2 + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N;
if (blocking > GEMM_Q) blocking = GEMM_Q;

for (i = 0; i < n; i += blocking) {


+ 1
- 1
lapack/lauum/lauum_U_parallel.c View File

@@ -88,7 +88,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.beta = NULL;
newarg.nthreads = args -> nthreads;

blocking = (n / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
blocking = ((n / 2 + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N;
if (blocking > GEMM_Q) blocking = GEMM_Q;

for (i = 0; i < n; i += blocking) {


Loading…
Cancel
Save