Improve variants of Cholesky and QR (Reference-LAPACK PR 847)tags/v0.3.24
| @@ -28,7 +28,7 @@ LULL = lu/LL/cgetrf.o lu/LL/dgetrf.o lu/LL/sgetrf.o lu/LL/zgetrf.o | |||
| LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o | |||
| QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil.o | |||
| QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o | |||
| .PHONY: all | |||
| @@ -24,7 +24,7 @@ C> \brief \b CPOTRF VARIANT: right looking block version of the algorithm, calli | |||
| C>\details \b Purpose: | |||
| C>\verbatim | |||
| C> | |||
| C> CPOTRF computes the Cholesky factorization of a real Hermitian | |||
| C> CPOTRF computes the Cholesky factorization of a complex Hermitian | |||
| C> positive definite matrix A. | |||
| C> | |||
| C> The factorization has the form | |||
| @@ -24,7 +24,7 @@ C> \brief \b ZPOTRF VARIANT: right looking block version of the algorithm, calli | |||
| C>\details \b Purpose: | |||
| C>\verbatim | |||
| C> | |||
| C> ZPOTRF computes the Cholesky factorization of a real Hermitian | |||
| C> ZPOTRF computes the Cholesky factorization of a complex Hermitian | |||
| C> positive definite matrix A. | |||
| C> | |||
| C> The factorization has the form | |||
| @@ -24,7 +24,7 @@ C> \brief \b CPOTRF VARIANT: top-looking block version of the algorithm, calling | |||
| C>\details \b Purpose: | |||
| C>\verbatim | |||
| C> | |||
| C> CPOTRF computes the Cholesky factorization of a real symmetric | |||
| C> CPOTRF computes the Cholesky factorization of a complex Hermitian | |||
| C> positive definite matrix A. | |||
| C> | |||
| C> The factorization has the form | |||
| @@ -55,7 +55,7 @@ C> | |||
| C> \param[in,out] A | |||
| C> \verbatim | |||
| C> A is COMPLEX array, dimension (LDA,N) | |||
| C> On entry, the symmetric matrix A. If UPLO = 'U', the leading | |||
| C> On entry, the Hermitian matrix A. If UPLO = 'U', the leading | |||
| C> N-by-N upper triangular part of A contains the upper | |||
| C> triangular part of the matrix A, and the strictly lower | |||
| C> triangular part of A is not referenced. If UPLO = 'L', the | |||
| @@ -24,7 +24,7 @@ C> \brief \b ZPOTRF VARIANT: top-looking block version of the algorithm, calling | |||
| C>\details \b Purpose: | |||
| C>\verbatim | |||
| C> | |||
| C> ZPOTRF computes the Cholesky factorization of a real symmetric | |||
| C> ZPOTRF computes the Cholesky factorization of a complex Hermitian | |||
| C> positive definite matrix A. | |||
| C> | |||
| C> The factorization has the form | |||
| @@ -55,7 +55,7 @@ C> | |||
| C> \param[in,out] A | |||
| C> \verbatim | |||
| C> A is COMPLEX*16 array, dimension (LDA,N) | |||
| C> On entry, the symmetric matrix A. If UPLO = 'U', the leading | |||
| C> On entry, the Hermitian matrix A. If UPLO = 'U', the leading | |||
| C> N-by-N upper triangular part of A contains the upper | |||
| C> triangular part of the matrix A, and the strictly lower | |||
| C> triangular part of A is not referenced. If UPLO = 'L', the | |||
| @@ -23,7 +23,7 @@ C> \brief \b CGEQRF VARIANT: left-looking Level 3 BLAS version of the algorithm. | |||
| C>\details \b Purpose: | |||
| C>\verbatim | |||
| C> | |||
| C> CGEQRF computes a QR factorization of a real M-by-N matrix A: | |||
| C> CGEQRF computes a QR factorization of a complex M-by-N matrix A: | |||
| C> A = Q * R. | |||
| C> | |||
| C> This is the left-looking Level 3 BLAS version of the algorithm. | |||
| @@ -172,12 +172,11 @@ C> | |||
| EXTERNAL CGEQR2, CLARFB, CLARFT, XERBLA | |||
| * .. | |||
| * .. Intrinsic Functions .. | |||
| INTRINSIC MAX, MIN | |||
| INTRINSIC CEILING, MAX, MIN, REAL | |||
| * .. | |||
| * .. External Functions .. | |||
| INTEGER ILAENV | |||
| REAL SCEIL | |||
| EXTERNAL ILAENV, SCEIL | |||
| EXTERNAL ILAENV | |||
| * .. | |||
| * .. Executable Statements .. | |||
| @@ -205,13 +204,13 @@ C> | |||
| * | |||
| * So here 4 x 4 is the last T stored in the workspace | |||
| * | |||
| NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB | |||
| NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB | |||
| * | |||
| * optimal workspace = space for dlarfb + space for normal T's + space for the last T | |||
| * | |||
| LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB)) | |||
| LLWORK = SCEIL(REAL(LLWORK)/REAL(NB)) | |||
| LLWORK = CEILING(REAL(LLWORK)/REAL(NB)) | |||
| IF( K.EQ.0 ) THEN | |||
| @@ -230,7 +229,7 @@ C> | |||
| ELSE | |||
| LBWORK = SCEIL(REAL(K)/REAL(NB))*NB | |||
| LBWORK = CEILING(REAL(K)/REAL(NB))*NB | |||
| LWKOPT = (LBWORK+LLWORK-NB)*NB | |||
| WORK( 1 ) = LWKOPT | |||
| @@ -172,12 +172,11 @@ C> | |||
| EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA | |||
| * .. | |||
| * .. Intrinsic Functions .. | |||
| INTRINSIC MAX, MIN | |||
| INTRINSIC CEILING, MAX, MIN, REAL | |||
| * .. | |||
| * .. External Functions .. | |||
| INTEGER ILAENV | |||
| REAL SCEIL | |||
| EXTERNAL ILAENV, SCEIL | |||
| EXTERNAL ILAENV | |||
| * .. | |||
| * .. Executable Statements .. | |||
| @@ -205,13 +204,13 @@ C> | |||
| * | |||
| * So here 4 x 4 is the last T stored in the workspace | |||
| * | |||
| NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB | |||
| NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB | |||
| * | |||
| * optimal workspace = space for dlarfb + space for normal T's + space for the last T | |||
| * | |||
| LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB)) | |||
| LLWORK = SCEIL(REAL(LLWORK)/REAL(NB)) | |||
| LLWORK = CEILING(REAL(LLWORK)/REAL(NB)) | |||
| IF( K.EQ.0 ) THEN | |||
| @@ -230,7 +229,7 @@ C> | |||
| ELSE | |||
| LBWORK = SCEIL(REAL(K)/REAL(NB))*NB | |||
| LBWORK = CEILING(REAL(K)/REAL(NB))*NB | |||
| LWKOPT = (LBWORK+LLWORK-NB)*NB | |||
| WORK( 1 ) = LWKOPT | |||
| @@ -1,86 +0,0 @@ | |||
| C> \brief \b SCEIL | |||
| * | |||
| * =========== DOCUMENTATION =========== | |||
| * | |||
| * Online html documentation available at | |||
| * http://www.netlib.org/lapack/explore-html/ | |||
| * | |||
| * Definition: | |||
| * =========== | |||
| * | |||
| * REAL FUNCTION SCEIL( A ) | |||
| * | |||
| * .. Scalar Arguments .. | |||
| * REAL A | |||
| * .. | |||
| * | |||
| * ===================================================================== | |||
| * | |||
| * .. Intrinsic Functions .. | |||
| * INTRINSIC INT | |||
| * .. | |||
| * .. Executable Statements ..* | |||
| * | |||
| * IF (A-INT(A).EQ.0) THEN | |||
| * SCEIL = A | |||
| * ELSE IF (A.GT.0) THEN | |||
| * SCEIL = INT(A)+1; | |||
| * ELSE | |||
| * SCEIL = INT(A) | |||
| * END IF | |||
| * | |||
| * RETURN | |||
| * | |||
| * END | |||
| * Purpose | |||
| * ======= | |||
| * | |||
| C>\details \b Purpose: | |||
| C>\verbatim | |||
| C>\endverbatim | |||
| * | |||
| * Arguments: | |||
| * ========== | |||
| * | |||
| * | |||
| * Authors: | |||
| * ======== | |||
| * | |||
| C> \author Univ. of Tennessee | |||
| C> \author Univ. of California Berkeley | |||
| C> \author Univ. of Colorado Denver | |||
| C> \author NAG Ltd. | |||
| * | |||
| C> \date December 2016 | |||
| * | |||
| C> \ingroup variantsOTHERcomputational | |||
| * | |||
| * ===================================================================== | |||
| REAL FUNCTION SCEIL( A ) | |||
| * | |||
| * -- LAPACK computational routine -- | |||
| * -- LAPACK is a software package provided by Univ. of Tennessee, -- | |||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
| * | |||
| * .. Scalar Arguments ..* | |||
| REAL A | |||
| * .. | |||
| * | |||
| * ===================================================================== | |||
| * | |||
| * .. Intrinsic Functions .. | |||
| INTRINSIC INT | |||
| * .. | |||
| * .. Executable Statements ..* | |||
| * | |||
| IF (A-INT(A).EQ.0) THEN | |||
| SCEIL = A | |||
| ELSE IF (A.GT.0) THEN | |||
| SCEIL = INT(A)+1; | |||
| ELSE | |||
| SCEIL = INT(A) | |||
| END IF | |||
| RETURN | |||
| * | |||
| END | |||
| @@ -172,12 +172,11 @@ C> | |||
| EXTERNAL SGEQR2, SLARFB, SLARFT, XERBLA | |||
| * .. | |||
| * .. Intrinsic Functions .. | |||
| INTRINSIC MAX, MIN | |||
| INTRINSIC CEILING, MAX, MIN, REAL | |||
| * .. | |||
| * .. External Functions .. | |||
| INTEGER ILAENV | |||
| REAL SCEIL | |||
| EXTERNAL ILAENV, SCEIL | |||
| EXTERNAL ILAENV | |||
| * .. | |||
| * .. Executable Statements .. | |||
| @@ -205,13 +204,13 @@ C> | |||
| * | |||
| * So here 4 x 4 is the last T stored in the workspace | |||
| * | |||
| NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB | |||
| NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB | |||
| * | |||
| * optimal workspace = space for dlarfb + space for normal T's + space for the last T | |||
| * | |||
| LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB)) | |||
| LLWORK = SCEIL(REAL(LLWORK)/REAL(NB)) | |||
| LLWORK = CEILING(REAL(LLWORK)/REAL(NB)) | |||
| IF( K.EQ.0 ) THEN | |||
| @@ -230,7 +229,7 @@ C> | |||
| ELSE | |||
| LBWORK = SCEIL(REAL(K)/REAL(NB))*NB | |||
| LBWORK = CEILING(REAL(K)/REAL(NB))*NB | |||
| LWKOPT = (LBWORK+LLWORK-NB)*NB | |||
| WORK( 1 ) = LWKOPT | |||
| @@ -23,7 +23,7 @@ C> \brief \b ZGEQRF VARIANT: left-looking Level 3 BLAS of the algorithm. | |||
| C>\details \b Purpose: | |||
| C>\verbatim | |||
| C> | |||
| C> ZGEQRF computes a QR factorization of a real M-by-N matrix A: | |||
| C> ZGEQRF computes a QR factorization of a complex M-by-N matrix A: | |||
| C> A = Q * R. | |||
| C> | |||
| C> This is the left-looking Level 3 BLAS version of the algorithm. | |||
| @@ -172,12 +172,11 @@ C> | |||
| EXTERNAL ZGEQR2, ZLARFB, ZLARFT, XERBLA | |||
| * .. | |||
| * .. Intrinsic Functions .. | |||
| INTRINSIC MAX, MIN | |||
| INTRINSIC CEILING, MAX, MIN, REAL | |||
| * .. | |||
| * .. External Functions .. | |||
| INTEGER ILAENV | |||
| REAL SCEIL | |||
| EXTERNAL ILAENV, SCEIL | |||
| EXTERNAL ILAENV | |||
| * .. | |||
| * .. Executable Statements .. | |||
| @@ -205,13 +204,13 @@ C> | |||
| * | |||
| * So here 4 x 4 is the last T stored in the workspace | |||
| * | |||
| NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB | |||
| NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB | |||
| * | |||
| * optimal workspace = space for dlarfb + space for normal T's + space for the last T | |||
| * | |||
| LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB)) | |||
| LLWORK = SCEIL(REAL(LLWORK)/REAL(NB)) | |||
| LLWORK = CEILING(REAL(LLWORK)/REAL(NB)) | |||
| IF( K.EQ.0 ) THEN | |||
| @@ -230,7 +229,7 @@ C> | |||
| ELSE | |||
| LBWORK = SCEIL(REAL(K)/REAL(NB))*NB | |||
| LBWORK = CEILING(REAL(K)/REAL(NB))*NB | |||
| LWKOPT = (LBWORK+LLWORK-NB)*NB | |||
| WORK( 1 ) = LWKOPT | |||