Browse Source

Merge f1aaf0777a into 0ab5bf1746

pull/1467/merge
Andrew GitHub 8 years ago
parent
commit
83b0712602
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 35 additions and 47 deletions
  1. +1
    -1
      benchmark/gemm.c
  2. +12
    -24
      kernel/generic/ztrmm_ltcopy_8.c
  3. +10
    -10
      kernel/x86_64/dtrmm_kernel_4x8_haswell.c
  4. +6
    -6
      lapack/getrf/getrf_parallel.c
  5. +3
    -3
      lapack/trtri/trtri_L_parallel.c
  6. +3
    -3
      lapack/trtri/trtri_U_parallel.c

+ 1
- 1
benchmark/gemm.c View File

@@ -237,7 +237,7 @@ int main(int argc, char *argv[]){
timeg = time1/loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6, time1);
COMPSIZE * COMPSIZE * (2.*(double)k+2.) * (double)m * (double)n / timeg * 1.e-6, time1);
}



+ 12
- 24
kernel/generic/ztrmm_ltcopy_8.c View File

@@ -801,32 +801,20 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON

i = (m & 1);
if (i > 0) {
if (X > posY) {
/* a01 += 2;
a02 += 2; */
b += 4;
} else
if (X < posY) {
b[ 0] = *(a01 + 0);
b[ 1] = *(a01 + 1);
b[ 2] = *(a01 + 2);
b[ 3] = *(a01 + 3);

/* a01 += lda;
a02 += lda; */
b += 4;
} else {
#ifdef UNIT
b[ 0] = ONE;
b[ 1] = ZERO;
#else
b[ 0] = *(a01 + 0);
b[ 1] = *(a01 + 1);
if (X < posY) {
#endif
b[ 2] = *(a01 + 2);
b[ 3] = *(a01 + 3);
b += 4;
}
b[ 0] = *(a01 + 0);
b[ 1] = *(a01 + 1);
#ifdef UNIT
} else {
b[ 0] = ONE;
b[ 1] = ZERO;
}
#endif
b[ 2] = *(a01 + 2);
b[ 3] = *(a01 + 3);
b += 4;
}
posY += 2;
}


+ 10
- 10
kernel/x86_64/dtrmm_kernel_4x8_haswell.c View File

@@ -301,7 +301,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
dtrmm_kernel_4x8( temp, &alpha , ptrba, ptrbb, C0, C1, C2, C3, C4, C5, C6, C7);

ptrba = ptrba + temp * 4;
ptrbb = ptrbb + temp * 8;
// ptrbb = ptrbb + temp * 8;

/*
for (k=0; k<temp; k++)
@@ -446,7 +446,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
temp - 8; // number of values in B

ptrba += temp*4; // number of values in A
ptrbb += temp*8; // number of values in B
// ptrbb += temp*8; // number of values in B
}
#ifdef LEFT
off += 4; // number of values in A
@@ -709,14 +709,14 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
off += 1; // number of values in A
#endif

C0 = C0+1;
/* C0 = C0+1;
C1 = C1+1;
C2 = C2+1;
C3 = C3+1;
C4 = C4+1;
C5 = C5+1;
C6 = C6+1;
C7 = C7+1;
C7 = C7+1; */

}

@@ -862,7 +862,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
temp - 4; // number of values in B */

ptrba += temp*4; // number of values in A
ptrbb += temp*4; // number of values in B
// ptrbb += temp*4; // number of values in B
}
#ifdef LEFT
off += 4; // number of values in A
@@ -1049,10 +1049,10 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
off += 1; // number of values in A
#endif

C0 = C0+1;
/* C0 = C0+1;
C1 = C1+1;
C2 = C2+1;
C3 = C3+1;
C3 = C3+1; */

}

@@ -1311,8 +1311,8 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
off += 1; // number of values in A
#endif

C0 = C0+1;
C1 = C1+1;
/* C0 = C0+1;
C1 = C1+1; */

}

@@ -1532,7 +1532,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
off += 1; // number of values in A
#endif

C0 = C0+1;
// C0 = C0+1;

}



+ 6
- 6
lapack/getrf/getrf_parallel.c View File

@@ -124,13 +124,13 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra
min_jj = js + min_j - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;

if (0 && GEMM_UNROLL_N <= 8) {
/* if (0 && GEMM_UNROLL_N <= 8) {

LASWP_NCOPY(min_jj, off + 1, off + k,
c + (- off + jjs * lda) * COMPSIZE, lda,
ipiv, sbb + k * (jjs - js) * COMPSIZE);

} else {
} else { */

LASWP_PLUS(min_jj, off + 1, off + k, ZERO,
#ifdef COMPLEX
@@ -140,7 +140,7 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra

GEMM_ONCOPY (k, min_jj, c + jjs * lda * COMPSIZE, lda, sbb + (jjs - js) * k * COMPSIZE);

}
// }

for (is = 0; is < k; is += GEMM_P) {
min_i = k - is;
@@ -251,14 +251,14 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
min_jj = MIN(n_to, xxx + div_n) - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;

if (0 && GEMM_UNROLL_N <= 8) {
/* if (0 && GEMM_UNROLL_N <= 8) {
printf("helllo\n");

LASWP_NCOPY(min_jj, off + 1, off + k,
b + (- off + jjs * lda) * COMPSIZE, lda,
ipiv, buffer[bufferside] + (jjs - xxx) * k * COMPSIZE);

} else {
} else { */

LASWP_PLUS(min_jj, off + 1, off + k, ZERO,
#ifdef COMPLEX
@@ -268,7 +268,7 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *

GEMM_ONCOPY (k, min_jj, b + jjs * lda * COMPSIZE, lda,
buffer[bufferside] + (jjs - xxx) * k * COMPSIZE);
}
// }

for (is = 0; is < k; is += GEMM_P) {
min_i = k - is;


+ 3
- 3
lapack/trtri/trtri_L_parallel.c View File

@@ -54,7 +54,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
BLASLONG n, info;
BLASLONG bk, i, blocking, start_i;
int mode;
BLASLONG lda, range_N[2];
BLASLONG lda;//, range_N[2];
blas_arg_t newarg;
FLOAT *a;
FLOAT alpha[2] = { ONE, ZERO};
@@ -100,8 +100,8 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
bk = n - i;
if (bk > blocking) bk = blocking;

range_N[0] = i;
range_N[1] = i + bk;
/* range_N[0] = i;
range_N[1] = i + bk; */

newarg.lda = lda;
newarg.ldb = lda;


+ 3
- 3
lapack/trtri/trtri_U_parallel.c View File

@@ -54,7 +54,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
BLASLONG n, info;
BLASLONG bk, i, blocking;
int mode;
BLASLONG lda, range_N[2];
BLASLONG lda; // , range_N[2];
blas_arg_t newarg;
FLOAT *a;
FLOAT alpha[2] = { ONE, ZERO};
@@ -96,8 +96,8 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
bk = n - i;
if (bk > blocking) bk = blocking;

range_N[0] = i;
range_N[1] = i + bk;
/* range_N[0] = i;
range_N[1] = i + bk; */

newarg.lda = lda;
newarg.ldb = lda;


Loading…
Cancel
Save