Browse Source

Merge pull request #3288 from martin-frbg/getrf-2

Add lower threshold for multithreading in ?GETRF
tags/v0.3.16^2
Martin Kroeker GitHub 4 years ago
parent
commit
f20c4edc33
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 49 additions and 17 deletions
  1. +24
    -8
      benchmark/getri.c
  2. +13
    -7
      benchmark/linpack.c
  3. +8
    -1
      interface/lapack/getrf.c
  4. +4
    -1
      interface/lapack/zgetrf.c

+ 24
- 8
benchmark/getri.c View File

@@ -72,13 +72,17 @@ int main(int argc, char *argv[]){
FLOAT *a,*work;
FLOAT wkopt[4];
blasint *ipiv;
blasint m, i, j, info,lwork;
blasint m, i, j, l, info,lwork;

int from = 1;
int to = 200;
int step = 1;
int loops = 1;

double time1;
double time1,timeg;
char *p;
char btest = 'I';

argc--;argv++;

@@ -86,6 +90,9 @@ int main(int argc, char *argv[]){
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_TEST"))) btest=*p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops=*p;

fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);

@@ -124,32 +131,41 @@ int main(int argc, char *argv[]){
fprintf(stderr, " SIZE FLops Time Lwork\n");

for(m = from; m <= to; m += step){
timeg = 0.;
fprintf(stderr, " %6d : ", (int)m);

GETRF (&m, &m, a, &m, ipiv, &info);
for (l = 0; l < loops; l++) {

if (btest == 'F') begin();
GETRF (&m, &m, a, &m, ipiv, &info);
if (btest == 'F') {
end();
timeg += getsec();
}
if (info) {
fprintf(stderr, "Matrix is not singular .. %d\n", info);
exit(1);
}

begin();
if (btest == 'I') begin();

lwork = -1;
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);

lwork = (blasint)wkopt[0];
GETRI(&m, a, &m, ipiv, work, &lwork, &info);
end();
if (btest == 'I') end();

if (info) {
fprintf(stderr, "failed compute inverse matrix .. %d\n", info);
exit(1);
}

time1 = getsec();

if (btest == 'I')
timeg += getsec();
} // loops
time1 = timeg/(double)loops;
fprintf(stderr,
" %10.2f MFlops : %10.2f Sec : %d\n",
COMPSIZE * COMPSIZE * (4.0/3.0 * (double)m * (double)m *(double)m - (double)m *(double)m + 5.0/3.0* (double)m) / time1 * 1.e-6,time1,lwork);


+ 13
- 7
benchmark/linpack.c View File

@@ -72,17 +72,21 @@ int main(int argc, char *argv[]){
FLOAT *a, *b;
blasint *ipiv;

blasint m, i, j, info;
blasint m, i, j, l, info;
blasint unit = 1;

int from = 1;
int to = 200;
int step = 1;
int loops = 1;

FLOAT maxerr;

double time1, time2;
double time1, time2, timeg1,timeg2;

char *p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops=*p;
argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
@@ -110,9 +114,9 @@ int main(int argc, char *argv[]){
fprintf(stderr, " SIZE Residual Decompose Solve Total\n");

for(m = from; m <= to; m += step){
timeg1 = timeg2 = 0.;
fprintf(stderr, " %6d : ", (int)m);
for (l = 0; l < loops; l++) {
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
@@ -138,7 +142,7 @@ int main(int argc, char *argv[]){
exit(1);
}

time1 = getsec();
timeg1 += getsec();

begin();

@@ -151,8 +155,10 @@ int main(int argc, char *argv[]){
exit(1);
}

time2 = getsec();

timeg2 += getsec();
} //loops
time1=timeg1/(double)loops;
time2=timeg2/(double)loops;
maxerr = 0.;

for(i = 0; i < m; i++){


+ 8
- 1
interface/lapack/getrf.c View File

@@ -95,7 +95,14 @@ int NAME(blasint *M, blasint *N, FLOAT *a, blasint *ldA, blasint *ipiv, blasint

#ifdef SMP
args.common = NULL;
args.nthreads = num_cpu_avail(4);
#ifndef DOUBLE
if (args.m*args.n < 40000)
#else
if (args.m*args.n < 10000)
#endif
args.nthreads=1;
else
args.nthreads = num_cpu_avail(4);

if (args.nthreads == 1) {
#endif


+ 4
- 1
interface/lapack/zgetrf.c View File

@@ -95,7 +95,10 @@ int NAME(blasint *M, blasint *N, FLOAT *a, blasint *ldA, blasint *ipiv, blasint

#ifdef SMP
args.common = NULL;
args.nthreads = num_cpu_avail(4);
if (args.m*args.n <10000)
args.nthreads = 1;
else
args.nthreads = num_cpu_avail(4);

if (args.nthreads == 1) {
#endif


Loading…
Cancel
Save