Browse Source

Merge pull request #4087 from martin-frbg/lapack847

Improve variants of Cholesky and QR (Reference-LAPACK PR 847)
tags/v0.3.24
Martin Kroeker GitHub 2 years ago
parent
commit
eb058c2ae2
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 29 additions and 119 deletions
  1. +1
    -1
      lapack-netlib/SRC/VARIANTS/Makefile
  2. +1
    -1
      lapack-netlib/SRC/VARIANTS/cholesky/RL/cpotrf.f
  3. +1
    -1
      lapack-netlib/SRC/VARIANTS/cholesky/RL/zpotrf.f
  4. +2
    -2
      lapack-netlib/SRC/VARIANTS/cholesky/TOP/cpotrf.f
  5. +2
    -2
      lapack-netlib/SRC/VARIANTS/cholesky/TOP/zpotrf.f
  6. +6
    -7
      lapack-netlib/SRC/VARIANTS/qr/LL/cgeqrf.f
  7. +5
    -6
      lapack-netlib/SRC/VARIANTS/qr/LL/dgeqrf.f
  8. +0
    -86
      lapack-netlib/SRC/VARIANTS/qr/LL/sceil.f
  9. +5
    -6
      lapack-netlib/SRC/VARIANTS/qr/LL/sgeqrf.f
  10. +6
    -7
      lapack-netlib/SRC/VARIANTS/qr/LL/zgeqrf.f

+ 1
- 1
lapack-netlib/SRC/VARIANTS/Makefile View File

@@ -28,7 +28,7 @@ LULL = lu/LL/cgetrf.o lu/LL/dgetrf.o lu/LL/sgetrf.o lu/LL/zgetrf.o

LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o

QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil.o
QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o


.PHONY: all


+ 1
- 1
lapack-netlib/SRC/VARIANTS/cholesky/RL/cpotrf.f View File

@@ -24,7 +24,7 @@ C> \brief \b CPOTRF VARIANT: right looking block version of the algorithm, calli
C>\details \b Purpose:
C>\verbatim
C>
C> CPOTRF computes the Cholesky factorization of a real Hermitian
C> CPOTRF computes the Cholesky factorization of a complex Hermitian
C> positive definite matrix A.
C>
C> The factorization has the form


+ 1
- 1
lapack-netlib/SRC/VARIANTS/cholesky/RL/zpotrf.f View File

@@ -24,7 +24,7 @@ C> \brief \b ZPOTRF VARIANT: right looking block version of the algorithm, calli
C>\details \b Purpose:
C>\verbatim
C>
C> ZPOTRF computes the Cholesky factorization of a real Hermitian
C> ZPOTRF computes the Cholesky factorization of a complex Hermitian
C> positive definite matrix A.
C>
C> The factorization has the form


+ 2
- 2
lapack-netlib/SRC/VARIANTS/cholesky/TOP/cpotrf.f View File

@@ -24,7 +24,7 @@ C> \brief \b CPOTRF VARIANT: top-looking block version of the algorithm, calling
C>\details \b Purpose:
C>\verbatim
C>
C> CPOTRF computes the Cholesky factorization of a real symmetric
C> CPOTRF computes the Cholesky factorization of a complex Hermitian
C> positive definite matrix A.
C>
C> The factorization has the form
@@ -55,7 +55,7 @@ C>
C> \param[in,out] A
C> \verbatim
C> A is COMPLEX array, dimension (LDA,N)
C> On entry, the symmetric matrix A. If UPLO = 'U', the leading
C> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
C> N-by-N upper triangular part of A contains the upper
C> triangular part of the matrix A, and the strictly lower
C> triangular part of A is not referenced. If UPLO = 'L', the


+ 2
- 2
lapack-netlib/SRC/VARIANTS/cholesky/TOP/zpotrf.f View File

@@ -24,7 +24,7 @@ C> \brief \b ZPOTRF VARIANT: top-looking block version of the algorithm, calling
C>\details \b Purpose:
C>\verbatim
C>
C> ZPOTRF computes the Cholesky factorization of a real symmetric
C> ZPOTRF computes the Cholesky factorization of a complex Hermitian
C> positive definite matrix A.
C>
C> The factorization has the form
@@ -55,7 +55,7 @@ C>
C> \param[in,out] A
C> \verbatim
C> A is COMPLEX*16 array, dimension (LDA,N)
C> On entry, the symmetric matrix A. If UPLO = 'U', the leading
C> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
C> N-by-N upper triangular part of A contains the upper
C> triangular part of the matrix A, and the strictly lower
C> triangular part of A is not referenced. If UPLO = 'L', the


+ 6
- 7
lapack-netlib/SRC/VARIANTS/qr/LL/cgeqrf.f View File

@@ -23,7 +23,7 @@ C> \brief \b CGEQRF VARIANT: left-looking Level 3 BLAS version of the algorithm.
C>\details \b Purpose:
C>\verbatim
C>
C> CGEQRF computes a QR factorization of a real M-by-N matrix A:
C> CGEQRF computes a QR factorization of a complex M-by-N matrix A:
C> A = Q * R.
C>
C> This is the left-looking Level 3 BLAS version of the algorithm.
@@ -172,12 +172,11 @@ C>
EXTERNAL CGEQR2, CLARFB, CLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
INTRINSIC CEILING, MAX, MIN, REAL
* ..
* .. External Functions ..
INTEGER ILAENV
REAL SCEIL
EXTERNAL ILAENV, SCEIL
EXTERNAL ILAENV
* ..
* .. Executable Statements ..

@@ -205,13 +204,13 @@ C>
*
* So here 4 x 4 is the last T stored in the workspace
*
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB

*
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
*
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))

IF( K.EQ.0 ) THEN

@@ -230,7 +229,7 @@ C>

ELSE

LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
LWKOPT = (LBWORK+LLWORK-NB)*NB
WORK( 1 ) = LWKOPT



+ 5
- 6
lapack-netlib/SRC/VARIANTS/qr/LL/dgeqrf.f View File

@@ -172,12 +172,11 @@ C>
EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
INTRINSIC CEILING, MAX, MIN, REAL
* ..
* .. External Functions ..
INTEGER ILAENV
REAL SCEIL
EXTERNAL ILAENV, SCEIL
EXTERNAL ILAENV
* ..
* .. Executable Statements ..

@@ -205,13 +204,13 @@ C>
*
* So here 4 x 4 is the last T stored in the workspace
*
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB

*
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
*
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))

IF( K.EQ.0 ) THEN

@@ -230,7 +229,7 @@ C>

ELSE

LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
LWKOPT = (LBWORK+LLWORK-NB)*NB
WORK( 1 ) = LWKOPT



+ 0
- 86
lapack-netlib/SRC/VARIANTS/qr/LL/sceil.f View File

@@ -1,86 +0,0 @@
C> \brief \b SCEIL
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* REAL FUNCTION SCEIL( A )
*
* .. Scalar Arguments ..
* REAL A
* ..
*
* =====================================================================
*
* .. Intrinsic Functions ..
* INTRINSIC INT
* ..
* .. Executable Statements ..*
*
* IF (A-INT(A).EQ.0) THEN
* SCEIL = A
* ELSE IF (A.GT.0) THEN
* SCEIL = INT(A)+1;
* ELSE
* SCEIL = INT(A)
* END IF
*
* RETURN
*
* END
* Purpose
* =======
*
C>\details \b Purpose:
C>\verbatim
C>\endverbatim
*
* Arguments:
* ==========
*
*
* Authors:
* ========
*
C> \author Univ. of Tennessee
C> \author Univ. of California Berkeley
C> \author Univ. of Colorado Denver
C> \author NAG Ltd.
*
C> \date December 2016
*
C> \ingroup variantsOTHERcomputational
*
* =====================================================================
REAL FUNCTION SCEIL( A )
*
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..*
REAL A
* ..
*
* =====================================================================
*
* .. Intrinsic Functions ..
INTRINSIC INT
* ..
* .. Executable Statements ..*
*
IF (A-INT(A).EQ.0) THEN
SCEIL = A
ELSE IF (A.GT.0) THEN
SCEIL = INT(A)+1;
ELSE
SCEIL = INT(A)
END IF

RETURN
*
END

+ 5
- 6
lapack-netlib/SRC/VARIANTS/qr/LL/sgeqrf.f View File

@@ -172,12 +172,11 @@ C>
EXTERNAL SGEQR2, SLARFB, SLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
INTRINSIC CEILING, MAX, MIN, REAL
* ..
* .. External Functions ..
INTEGER ILAENV
REAL SCEIL
EXTERNAL ILAENV, SCEIL
EXTERNAL ILAENV
* ..
* .. Executable Statements ..

@@ -205,13 +204,13 @@ C>
*
* So here 4 x 4 is the last T stored in the workspace
*
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB

*
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
*
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))

IF( K.EQ.0 ) THEN

@@ -230,7 +229,7 @@ C>

ELSE

LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
LWKOPT = (LBWORK+LLWORK-NB)*NB
WORK( 1 ) = LWKOPT



+ 6
- 7
lapack-netlib/SRC/VARIANTS/qr/LL/zgeqrf.f View File

@@ -23,7 +23,7 @@ C> \brief \b ZGEQRF VARIANT: left-looking Level 3 BLAS of the algorithm.
C>\details \b Purpose:
C>\verbatim
C>
C> ZGEQRF computes a QR factorization of a real M-by-N matrix A:
C> ZGEQRF computes a QR factorization of a complex M-by-N matrix A:
C> A = Q * R.
C>
C> This is the left-looking Level 3 BLAS version of the algorithm.
@@ -172,12 +172,11 @@ C>
EXTERNAL ZGEQR2, ZLARFB, ZLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
INTRINSIC CEILING, MAX, MIN, REAL
* ..
* .. External Functions ..
INTEGER ILAENV
REAL SCEIL
EXTERNAL ILAENV, SCEIL
EXTERNAL ILAENV
* ..
* .. Executable Statements ..

@@ -205,13 +204,13 @@ C>
*
* So here 4 x 4 is the last T stored in the workspace
*
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB

*
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
*
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))

IF( K.EQ.0 ) THEN

@@ -230,7 +229,7 @@ C>

ELSE

LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
LWKOPT = (LBWORK+LLWORK-NB)*NB
WORK( 1 ) = LWKOPT



Loading…
Cancel
Save