Disable repeated recursion on Ab_BR in ReLAPACK xGBTRFtags/v0.3.6^2
| @@ -221,7 +221,9 @@ static void RELAPACK_cgbtrf_rec( | |||
| } | |||
| // recursion(Ab_BR, ipiv_B) | |||
| RELAPACK_cgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info); | |||
| //RELAPACK_cgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info); | |||
| LAPACK(cgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info); | |||
| if (*info) | |||
| *info += n1; | |||
| // shift pivots | |||
| @@ -1,5 +1,6 @@ | |||
| #include "relapack.h" | |||
| #include "stdlib.h" | |||
| #include <stdlib.h> | |||
| #include <stdio.h> | |||
| static void RELAPACK_dgbtrf_rec(const blasint *, const blasint *, const blasint *, | |||
| const blasint *, double *, const blasint *, blasint *, double *, const blasint *, double *, | |||
| const blasint *, blasint *); | |||
| @@ -218,7 +219,8 @@ static void RELAPACK_dgbtrf_rec( | |||
| } | |||
| // recursion(Ab_BR, ipiv_B) | |||
| RELAPACK_dgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info); | |||
| // RELAPACK_dgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info); | |||
| LAPACK(dgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info); | |||
| if (*info) | |||
| *info += n1; | |||
| // shift pivots | |||
| @@ -27,7 +27,7 @@ void RELAPACK_sgbtrf( | |||
| *info = -3; | |||
| else if (*ku < 0) | |||
| *info = -4; | |||
| else if (*ldAb < 2 * *kl + *ku + 1) | |||
| else if (*ldAb < 2 * *kl + *ku + 1) | |||
| *info = -6; | |||
| if (*info) { | |||
| const blasint minfo = -*info; | |||
| @@ -55,15 +55,16 @@ void RELAPACK_sgbtrf( | |||
| // Allocate work space | |||
| const blasint n1 = SREC_SPLIT(*n); | |||
| const blasint mWorkl = (kv > n1) ? MAX(1, *m - *kl) : kv; | |||
| const blasint nWorkl = (kv > n1) ? n1 : kv; | |||
| const blasint mWorku = (*kl > n1) ? n1 : *kl; | |||
| const blasint nWorku = (*kl > n1) ? MAX(0, *n - *kl) : *kl; | |||
| const blasint mWorkl = abs( (kv > n1) ? MAX(1, *m - *kl) : kv ); | |||
| const blasint nWorkl = abs( (kv > n1) ? n1 : kv ); | |||
| const blasint mWorku = abs( (*kl > n1) ? n1 : *kl ); | |||
| const blasint nWorku = abs( (*kl > n1) ? MAX(0, *n - *kl) : *kl ); | |||
| float *Workl = malloc(mWorkl * nWorkl * sizeof(float)); | |||
| float *Worku = malloc(mWorku * nWorku * sizeof(float)); | |||
| LAPACK(slaset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl); | |||
| LAPACK(slaset)("U", &mWorku, &nWorku, ZERO, ZERO, Worku, &mWorku); | |||
| // Recursive kernel | |||
| RELAPACK_sgbtrf_rec(m, n, kl, ku, Ab, ldAb, ipiv, Workl, &mWorkl, Worku, &mWorku, info); | |||
| @@ -81,6 +82,7 @@ static void RELAPACK_sgbtrf_rec( | |||
| blasint *info | |||
| ) { | |||
| if (*n <= MAX(CROSSOVER_SGBTRF, 1)) { | |||
| // Unblocked | |||
| LAPACK(sgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); | |||
| @@ -127,7 +129,7 @@ static void RELAPACK_sgbtrf_rec( | |||
| float *const A_BR = A + *ldA * n1 + m1; | |||
| // ipiv_T | |||
| // ipiv_B | |||
| // ipiv_B | |||
| blasint *const ipiv_T = ipiv; | |||
| blasint *const ipiv_B = ipiv + n1; | |||
| @@ -155,6 +157,7 @@ static void RELAPACK_sgbtrf_rec( | |||
| float *const A_BRbl = A_BR + m21; | |||
| float *const A_BRbr = A_BR + *ldA * n21 + m21; | |||
| // recursion(Ab_L, ipiv_T) | |||
| RELAPACK_sgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info); | |||
| @@ -216,8 +219,11 @@ static void RELAPACK_sgbtrf_rec( | |||
| } | |||
| } | |||
| // recursion(Ab_BR, ipiv_B) | |||
| RELAPACK_sgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info); | |||
| //cause of infinite recursion here ? | |||
| // RELAPACK_sgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info); | |||
| LAPACK(sgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info); | |||
| if (*info) | |||
| *info += n1; | |||
| // shift pivots | |||
| @@ -56,10 +56,10 @@ void RELAPACK_zgbtrf( | |||
| // Allocate work space | |||
| const blasint n1 = ZREC_SPLIT(*n); | |||
| const blasint mWorkl = (kv > n1) ? MAX(1, *m - *kl) : kv; | |||
| const blasint nWorkl = (kv > n1) ? n1 : kv; | |||
| const blasint mWorku = (*kl > n1) ? n1 : *kl; | |||
| const blasint nWorku = (*kl > n1) ? MAX(0, *n - *kl) : *kl; | |||
| const blasint mWorkl = abs ( (kv > n1) ? MAX(1, *m - *kl) : kv); | |||
| const blasint nWorkl = abs ( (kv > n1) ? n1 : kv); | |||
| const blasint mWorku = abs ( (*kl > n1) ? n1 : *kl); | |||
| const blasint nWorku = abs ( (*kl > n1) ? MAX(0, *n - *kl) : *kl); | |||
| double *Workl = malloc(mWorkl * nWorkl * 2 * sizeof(double)); | |||
| double *Worku = malloc(mWorku * nWorku * 2 * sizeof(double)); | |||
| LAPACK(zlaset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl); | |||
| @@ -221,7 +221,9 @@ static void RELAPACK_zgbtrf_rec( | |||
| } | |||
| // recursion(Ab_BR, ipiv_B) | |||
| RELAPACK_zgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info); | |||
| // RELAPACK_zgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info); | |||
| LAPACK(zgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info); | |||
| if (*info) | |||
| *info += n1; | |||
| // shift pivots | |||