Update LAPACK to 3.9.0

6 years ago · a421ab9ce2
--- a/lapack-netlib/SRC/dlaed4.f
+++ b/lapack-netlib/SRC/dlaed4.f
@@ -82,7 +82,7 @@
 *> \param[out] DELTA
 *> \verbatim
 *>          DELTA is DOUBLE PRECISION array, dimension (N)
 *>         If N .GT. 2, DELTA contains (D(j) - lambda_I) in its  j-th
 *>         If N > 2, DELTA contains (D(j) - lambda_I) in its  j-th
 *>         component.  If N = 1, then DELTA(1) = 1. If N = 2, see DLAED5
 *>         for detail. The vector DELTA contains the information necessary
 *>         to construct the eigenvectors by DLAED3 and DLAED9.
--- a/lapack-netlib/SRC/dlaed8.f
+++ b/lapack-netlib/SRC/dlaed8.f
@@ -353,7 +353,7 @@
         Z( I ) = W( INDX( I ) )
   40 CONTINUE
 *
 *     Calculate the allowable deflation tolerence
 *     Calculate the allowable deflation tolerance
 *
      IMAX = IDAMAX( N, Z, 1 )
      JMAX = IDAMAX( N, D, 1 )
--- a/lapack-netlib/SRC/dlagtf.f
+++ b/lapack-netlib/SRC/dlagtf.f
@@ -125,7 +125,7 @@
 *>          then IN(k) = 1, otherwise IN(k) = 0. The element IN(n)
 *>          returns the smallest positive integer j such that
 *>
 *>             abs( u(j,j) ).le. norm( (T - lambda*I)(j) )*TOL,
 *>             abs( u(j,j) ) <= norm( (T - lambda*I)(j) )*TOL,
 *>
 *>          where norm( A(j) ) denotes the sum of the absolute values of
 *>          the jth row of the matrix A. If no such j exists then IN(n)
@@ -137,8 +137,8 @@
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0   : successful exit
 *>          .lt. 0: if INFO = -k, the kth argument had an illegal value
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -k, the kth argument had an illegal value
 *> \endverbatim
 *
 *  Authors:
--- a/lapack-netlib/SRC/dlagts.f
+++ b/lapack-netlib/SRC/dlagts.f
@@ -122,12 +122,12 @@
 *> \param[in,out] TOL
 *> \verbatim
 *>          TOL is DOUBLE PRECISION
 *>          On entry, with  JOB .lt. 0, TOL should be the minimum
 *>          On entry, with  JOB < 0, TOL should be the minimum
 *>          perturbation to be made to very small diagonal elements of U.
 *>          TOL should normally be chosen as about eps*norm(U), where eps
 *>          is the relative machine precision, but if TOL is supplied as
 *>          non-positive, then it is reset to eps*max( abs( u(i,j) ) ).
 *>          If  JOB .gt. 0  then TOL is not referenced.
 *>          If  JOB > 0  then TOL is not referenced.
 *>
 *>          On exit, TOL is changed as described above, only if TOL is
 *>          non-positive on entry. Otherwise TOL is unchanged.
@@ -136,14 +136,14 @@
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0   : successful exit
 *>          .lt. 0: if INFO = -i, the i-th argument had an illegal value
 *>          .gt. 0: overflow would occur when computing the INFO(th)
 *>                  element of the solution vector x. This can only occur
 *>                  when JOB is supplied as positive and either means
 *>                  that a diagonal element of U is very small, or that
 *>                  the elements of the right-hand side vector y are very
 *>                  large.
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value
 *>          > 0:  overflow would occur when computing the INFO(th)
 *>                element of the solution vector x. This can only occur
 *>                when JOB is supplied as positive and either means
 *>                that a diagonal element of U is very small, or that
 *>                the elements of the right-hand side vector y are very
 *>                large.
 *> \endverbatim
 *
 *  Authors:
--- a/lapack-netlib/SRC/dlahqr.f
+++ b/lapack-netlib/SRC/dlahqr.f
@@ -150,26 +150,26 @@
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>           =   0: successful exit
 *>          .GT. 0: If INFO = i, DLAHQR failed to compute all the
 *>           = 0:  successful exit
 *>           > 0:  If INFO = i, DLAHQR failed to compute all the
 *>                  eigenvalues ILO to IHI in a total of 30 iterations
 *>                  per eigenvalue; elements i+1:ihi of WR and WI
 *>                  contain those eigenvalues which have been
 *>                  successfully computed.
 *>
 *>                  If INFO .GT. 0 and WANTT is .FALSE., then on exit,
 *>                  If INFO > 0 and WANTT is .FALSE., then on exit,
 *>                  the remaining unconverged eigenvalues are the
 *>                  eigenvalues of the upper Hessenberg matrix rows
 *>                  and columns ILO thorugh INFO of the final, output
 *>                  and columns ILO through INFO of the final, output
 *>                  value of H.
 *>
 *>                  If INFO .GT. 0 and WANTT is .TRUE., then on exit
 *>                  If INFO > 0 and WANTT is .TRUE., then on exit
 *>          (*)       (initial value of H)*U  = U*(final value of H)
 *>                  where U is an orthognal matrix.    The final
 *>                  where U is an orthogonal matrix.    The final
 *>                  value of H is upper Hessenberg and triangular in
 *>                  rows and columns INFO+1 through IHI.
 *>
 *>                  If INFO .GT. 0 and WANTZ is .TRUE., then on exit
 *>                  If INFO > 0 and WANTZ is .TRUE., then on exit
 *>                      (final value of Z)  = (initial value of Z)*U
 *>                  where U is the orthogonal matrix in (*)
 *>                  (regardless of the value of WANTT.)
--- a/lapack-netlib/SRC/dlaln2.f
+++ b/lapack-netlib/SRC/dlaln2.f
@@ -49,7 +49,7 @@
 *> the first column of each being the real part and the second
 *> being the imaginary part.
 *>
 *> "s" is a scaling factor (.LE. 1), computed by DLALN2, which is
 *> "s" is a scaling factor (<= 1), computed by DLALN2, which is
 *> so chosen that X can be computed without overflow.  X is further
 *> scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
 *> than overflow.
--- a/lapack-netlib/SRC/dlamswlq.f
+++ b/lapack-netlib/SRC/dlamswlq.f
@@ -1,3 +1,4 @@
 *> \brief \b DLAMSWLQ
 *
 *  Definition:
 *  ===========
--- a/lapack-netlib/SRC/dlamtsqr.f
+++ b/lapack-netlib/SRC/dlamtsqr.f
@@ -1,3 +1,4 @@
 *> \brief \b DLAMTSQR
 *
 *  Definition:
 *  ===========
--- a/lapack-netlib/SRC/dlangb.f
+++ b/lapack-netlib/SRC/dlangb.f
@@ -129,6 +129,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
      IMPLICIT NONE
 *     .. Scalar Arguments ..
      CHARACTER          NORM
      INTEGER            KL, KU, LDAB, N
@@ -139,22 +140,24 @@
 *
 * =====================================================================
 *
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE, ZERO
      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
 *     ..
 *     .. Local Scalars ..
      INTEGER            I, J, K, L
      DOUBLE PRECISION   SCALE, SUM, VALUE, TEMP
      DOUBLE PRECISION   SUM, VALUE, TEMP
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ
 *     .. Local Arrays ..
      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME, DISNAN
      EXTERNAL           LSAME, DISNAN
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ, DCOMBSSQ
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, MAX, MIN, SQRT
 *     ..
@@ -206,15 +209,22 @@
      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
 *        SSQ(1) is scale
 *        SSQ(2) is sum-of-squares
 *        For better accuracy, sum each column separately.
 *
         SCALE = ZERO
         SUM = ONE
         SSQ( 1 ) = ZERO
         SSQ( 2 ) = ONE
         DO 90 J = 1, N
            L = MAX( 1, J-KU )
            K = KU + 1 - J + L
            CALL DLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM )
            COLSSQ( 1 ) = ZERO
            COLSSQ( 2 ) = ONE
            CALL DLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1,
     $                   COLSSQ( 1 ), COLSSQ( 2 ) )
            CALL DCOMBSSQ( SSQ, COLSSQ )
   90    CONTINUE
         VALUE = SCALE*SQRT( SUM )
         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
      END IF
 *
      DLANGB = VALUE
--- a/lapack-netlib/SRC/dlange.f
+++ b/lapack-netlib/SRC/dlange.f
@@ -119,6 +119,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
      IMPLICIT NONE
 *     .. Scalar Arguments ..
      CHARACTER          NORM
      INTEGER            LDA, M, N
@@ -135,10 +136,13 @@
 *     ..
 *     .. Local Scalars ..
      INTEGER            I, J
      DOUBLE PRECISION   SCALE, SUM, VALUE, TEMP
      DOUBLE PRECISION   SUM, VALUE, TEMP
 *     ..
 *     .. Local Arrays ..
      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ
      EXTERNAL           DLASSQ, DCOMBSSQ
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME, DISNAN
@@ -194,13 +198,19 @@
      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
 *        SSQ(1) is scale
 *        SSQ(2) is sum-of-squares
 *        For better accuracy, sum each column separately.
 *
         SCALE = ZERO
         SUM = ONE
         SSQ( 1 ) = ZERO
         SSQ( 2 ) = ONE
         DO 90 J = 1, N
            CALL DLASSQ( M, A( 1, J ), 1, SCALE, SUM )
            COLSSQ( 1 ) = ZERO
            COLSSQ( 2 ) = ONE
            CALL DLASSQ( M, A( 1, J ), 1, COLSSQ( 1 ), COLSSQ( 2 ) )
            CALL DCOMBSSQ( SSQ, COLSSQ )
   90    CONTINUE
         VALUE = SCALE*SQRT( SUM )
         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
      END IF
 *
      DLANGE = VALUE
--- a/lapack-netlib/SRC/dlanhs.f
+++ b/lapack-netlib/SRC/dlanhs.f
@@ -113,6 +113,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
      IMPLICIT NONE
 *     .. Scalar Arguments ..
      CHARACTER          NORM
      INTEGER            LDA, N
@@ -129,15 +130,18 @@
 *     ..
 *     .. Local Scalars ..
      INTEGER            I, J
      DOUBLE PRECISION   SCALE, SUM, VALUE
      DOUBLE PRECISION   SUM, VALUE
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ
 *     .. Local Arrays ..
      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME, DISNAN
      EXTERNAL           LSAME, DISNAN
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ, DCOMBSSQ
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, MIN, SQRT
 *     ..
@@ -188,13 +192,20 @@
      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
 *        SSQ(1) is scale
 *        SSQ(2) is sum-of-squares
 *        For better accuracy, sum each column separately.
 *
         SCALE = ZERO
         SUM = ONE
         SSQ( 1 ) = ZERO
         SSQ( 2 ) = ONE
         DO 90 J = 1, N
            CALL DLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM )
            COLSSQ( 1 ) = ZERO
            COLSSQ( 2 ) = ONE
            CALL DLASSQ( MIN( N, J+1 ), A( 1, J ), 1,
     $                   COLSSQ( 1 ), COLSSQ( 2 ) )
            CALL DCOMBSSQ( SSQ, COLSSQ )
   90    CONTINUE
         VALUE = SCALE*SQRT( SUM )
         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
      END IF
 *
      DLANHS = VALUE
--- a/lapack-netlib/SRC/dlansb.f
+++ b/lapack-netlib/SRC/dlansb.f
@@ -134,6 +134,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
      IMPLICIT NONE
 *     .. Scalar Arguments ..
      CHARACTER          NORM, UPLO
      INTEGER            K, LDAB, N
@@ -150,15 +151,18 @@
 *     ..
 *     .. Local Scalars ..
      INTEGER            I, J, L
      DOUBLE PRECISION   ABSA, SCALE, SUM, VALUE
      DOUBLE PRECISION   ABSA, SUM, VALUE
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ
 *     .. Local Arrays ..
      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME, DISNAN
      EXTERNAL           LSAME, DISNAN
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ, DCOMBSSQ
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, MAX, MIN, SQRT
 *     ..
@@ -225,29 +229,47 @@
      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
 *        SSQ(1) is scale
 *        SSQ(2) is sum-of-squares
 *        For better accuracy, sum each column separately.
 *
         SSQ( 1 ) = ZERO
         SSQ( 2 ) = ONE
 *
 *        Sum off-diagonals
 *
         SCALE = ZERO
         SUM = ONE
         IF( K.GT.0 ) THEN
            IF( LSAME( UPLO, 'U' ) ) THEN
               DO 110 J = 2, N
                  COLSSQ( 1 ) = ZERO
                  COLSSQ( 2 ) = ONE
                  CALL DLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ),
     $                         1, SCALE, SUM )
     $                         1, COLSSQ( 1 ), COLSSQ( 2 ) )
                  CALL DCOMBSSQ( SSQ, COLSSQ )
  110          CONTINUE
               L = K + 1
            ELSE
               DO 120 J = 1, N - 1
                  CALL DLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
     $                         SUM )
                  COLSSQ( 1 ) = ZERO
                  COLSSQ( 2 ) = ONE
                  CALL DLASSQ( MIN( N-J, K ), AB( 2, J ), 1,
     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
                  CALL DCOMBSSQ( SSQ, COLSSQ )
  120          CONTINUE
               L = 1
            END IF
            SUM = 2*SUM
            SSQ( 2 ) = 2*SSQ( 2 )
         ELSE
            L = 1
         END IF
         CALL DLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM )
         VALUE = SCALE*SQRT( SUM )
 *
 *        Sum diagonal
 *
         COLSSQ( 1 ) = ZERO
         COLSSQ( 2 ) = ONE
         CALL DLASSQ( N, AB( L, 1 ), LDAB, COLSSQ( 1 ), COLSSQ( 2 ) )
         CALL DCOMBSSQ( SSQ, COLSSQ )
         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
      END IF
 *
      DLANSB = VALUE
--- a/lapack-netlib/SRC/dlansp.f
+++ b/lapack-netlib/SRC/dlansp.f
@@ -119,6 +119,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
      IMPLICIT NONE
 *     .. Scalar Arguments ..
      CHARACTER          NORM, UPLO
      INTEGER            N
@@ -135,15 +136,18 @@
 *     ..
 *     .. Local Scalars ..
      INTEGER            I, J, K
      DOUBLE PRECISION   ABSA, SCALE, SUM, VALUE
      DOUBLE PRECISION   ABSA, SUM, VALUE
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ
 *     .. Local Arrays ..
      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME, DISNAN
      EXTERNAL           LSAME, DISNAN
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ, DCOMBSSQ
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, SQRT
 *     ..
@@ -217,31 +221,48 @@
      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
 *        SSQ(1) is scale
 *        SSQ(2) is sum-of-squares
 *        For better accuracy, sum each column separately.
 *
         SSQ( 1 ) = ZERO
         SSQ( 2 ) = ONE
 *
 *        Sum off-diagonals
 *
         SCALE = ZERO
         SUM = ONE
         K = 2
         IF( LSAME( UPLO, 'U' ) ) THEN
            DO 110 J = 2, N
               CALL DLASSQ( J-1, AP( K ), 1, SCALE, SUM )
               COLSSQ( 1 ) = ZERO
               COLSSQ( 2 ) = ONE
               CALL DLASSQ( J-1, AP( K ), 1, COLSSQ( 1 ), COLSSQ( 2 ) )
               CALL DCOMBSSQ( SSQ, COLSSQ )
               K = K + J
  110       CONTINUE
         ELSE
            DO 120 J = 1, N - 1
               CALL DLASSQ( N-J, AP( K ), 1, SCALE, SUM )
               COLSSQ( 1 ) = ZERO
               COLSSQ( 2 ) = ONE
               CALL DLASSQ( N-J, AP( K ), 1, COLSSQ( 1 ), COLSSQ( 2 ) )
               CALL DCOMBSSQ( SSQ, COLSSQ )
               K = K + N - J + 1
  120       CONTINUE
         END IF
         SUM = 2*SUM
         SSQ( 2 ) = 2*SSQ( 2 )
 *
 *        Sum diagonal
 *
         K = 1
         COLSSQ( 1 ) = ZERO
         COLSSQ( 2 ) = ONE
         DO 130 I = 1, N
            IF( AP( K ).NE.ZERO ) THEN
               ABSA = ABS( AP( K ) )
               IF( SCALE.LT.ABSA ) THEN
                  SUM = ONE + SUM*( SCALE / ABSA )**2
                  SCALE = ABSA
               IF( COLSSQ( 1 ).LT.ABSA ) THEN
                  COLSSQ( 2 ) = ONE + COLSSQ(2)*( COLSSQ(1) / ABSA )**2
                  COLSSQ( 1 ) = ABSA
               ELSE
                  SUM = SUM + ( ABSA / SCALE )**2
                  COLSSQ( 2 ) = COLSSQ( 2 ) + ( ABSA / COLSSQ( 1 ) )**2
               END IF
            END IF
            IF( LSAME( UPLO, 'U' ) ) THEN
@@ -250,7 +271,8 @@
               K = K + N - I + 1
            END IF
  130    CONTINUE
         VALUE = SCALE*SQRT( SUM )
         CALL DCOMBSSQ( SSQ, COLSSQ )
         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
      END IF
 *
      DLANSP = VALUE
--- a/lapack-netlib/SRC/dlansy.f
+++ b/lapack-netlib/SRC/dlansy.f
@@ -127,6 +127,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
      IMPLICIT NONE
 *     .. Scalar Arguments ..
      CHARACTER          NORM, UPLO
      INTEGER            LDA, N
@@ -143,15 +144,18 @@
 *     ..
 *     .. Local Scalars ..
      INTEGER            I, J
      DOUBLE PRECISION   ABSA, SCALE, SUM, VALUE
      DOUBLE PRECISION   ABSA, SUM, VALUE
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ
 *     .. Local Arrays ..
      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME, DISNAN
      EXTERNAL           LSAME, DISNAN
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ, DCOMBSSQ
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, SQRT
 *     ..
@@ -216,21 +220,39 @@
      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
 *        SSQ(1) is scale
 *        SSQ(2) is sum-of-squares
 *        For better accuracy, sum each column separately.
 *
         SSQ( 1 ) = ZERO
         SSQ( 2 ) = ONE
 *
 *        Sum off-diagonals
 *
         SCALE = ZERO
         SUM = ONE
         IF( LSAME( UPLO, 'U' ) ) THEN
            DO 110 J = 2, N
               CALL DLASSQ( J-1, A( 1, J ), 1, SCALE, SUM )
               COLSSQ( 1 ) = ZERO
               COLSSQ( 2 ) = ONE
               CALL DLASSQ( J-1, A( 1, J ), 1, COLSSQ(1), COLSSQ(2) )
               CALL DCOMBSSQ( SSQ, COLSSQ )
  110       CONTINUE
         ELSE
            DO 120 J = 1, N - 1
               CALL DLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM )
               COLSSQ( 1 ) = ZERO
               COLSSQ( 2 ) = ONE
               CALL DLASSQ( N-J, A( J+1, J ), 1, COLSSQ(1), COLSSQ(2) )
               CALL DCOMBSSQ( SSQ, COLSSQ )
  120       CONTINUE
         END IF
         SUM = 2*SUM
         CALL DLASSQ( N, A, LDA+1, SCALE, SUM )
         VALUE = SCALE*SQRT( SUM )
         SSQ( 2 ) = 2*SSQ( 2 )
 *
 *        Sum diagonal
 *
         COLSSQ( 1 ) = ZERO
         COLSSQ( 2 ) = ONE
         CALL DLASSQ( N, A, LDA+1, COLSSQ( 1 ), COLSSQ( 2 ) )
         CALL DCOMBSSQ( SSQ, COLSSQ )
         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
      END IF
 *
      DLANSY = VALUE
--- a/lapack-netlib/SRC/dlantb.f
+++ b/lapack-netlib/SRC/dlantb.f
@@ -145,6 +145,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
      IMPLICIT NONE
 *     .. Scalar Arguments ..
      CHARACTER          DIAG, NORM, UPLO
      INTEGER            K, LDAB, N
@@ -162,15 +163,18 @@
 *     .. Local Scalars ..
      LOGICAL            UDIAG
      INTEGER            I, J, L
      DOUBLE PRECISION   SCALE, SUM, VALUE
      DOUBLE PRECISION   SUM, VALUE
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ
 *     .. Local Arrays ..
      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME, DISNAN
      EXTERNAL           LSAME, DISNAN
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ, DCOMBSSQ
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, MAX, MIN, SQRT
 *     ..
@@ -311,46 +315,61 @@
      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
 *        SSQ(1) is scale
 *        SSQ(2) is sum-of-squares
 *        For better accuracy, sum each column separately.
 *
         IF( LSAME( UPLO, 'U' ) ) THEN
            IF( LSAME( DIAG, 'U' ) ) THEN
               SCALE = ONE
               SUM = N
               SSQ( 1 ) = ONE
               SSQ( 2 ) = N
               IF( K.GT.0 ) THEN
                  DO 280 J = 2, N
                     COLSSQ( 1 ) = ZERO
                     COLSSQ( 2 ) = ONE
                     CALL DLASSQ( MIN( J-1, K ),
     $                            AB( MAX( K+2-J, 1 ), J ), 1, SCALE,
     $                            SUM )
     $                            AB( MAX( K+2-J, 1 ), J ), 1,
     $                            COLSSQ( 1 ), COLSSQ( 2 ) )
                     CALL DCOMBSSQ( SSQ, COLSSQ )
  280             CONTINUE
               END IF
            ELSE
               SCALE = ZERO
               SUM = ONE
               SSQ( 1 ) = ZERO
               SSQ( 2 ) = ONE
               DO 290 J = 1, N
                  COLSSQ( 1 ) = ZERO
                  COLSSQ( 2 ) = ONE
                  CALL DLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ),
     $                         1, SCALE, SUM )
     $                         1, COLSSQ( 1 ), COLSSQ( 2 ) )
                  CALL DCOMBSSQ( SSQ, COLSSQ )
  290          CONTINUE
            END IF
         ELSE
            IF( LSAME( DIAG, 'U' ) ) THEN
               SCALE = ONE
               SUM = N
               SSQ( 1 ) = ONE
               SSQ( 2 ) = N
               IF( K.GT.0 ) THEN
                  DO 300 J = 1, N - 1
                     CALL DLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
     $                            SUM )
                     COLSSQ( 1 ) = ZERO
                     COLSSQ( 2 ) = ONE
                     CALL DLASSQ( MIN( N-J, K ), AB( 2, J ), 1,
     $                            COLSSQ( 1 ), COLSSQ( 2 ) )
                     CALL DCOMBSSQ( SSQ, COLSSQ )
  300             CONTINUE
               END IF
            ELSE
               SCALE = ZERO
               SUM = ONE
               SSQ( 1 ) = ZERO
               SSQ( 2 ) = ONE
               DO 310 J = 1, N
                  CALL DLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE,
     $                         SUM )
                  COLSSQ( 1 ) = ZERO
                  COLSSQ( 2 ) = ONE
                  CALL DLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1,
     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
                  CALL DCOMBSSQ( SSQ, COLSSQ )
  310          CONTINUE
            END IF
         END IF
         VALUE = SCALE*SQRT( SUM )
         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
      END IF
 *
      DLANTB = VALUE
--- a/lapack-netlib/SRC/dlantp.f
+++ b/lapack-netlib/SRC/dlantp.f
@@ -129,6 +129,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
      IMPLICIT NONE
 *     .. Scalar Arguments ..
      CHARACTER          DIAG, NORM, UPLO
      INTEGER            N
@@ -146,15 +147,18 @@
 *     .. Local Scalars ..
      LOGICAL            UDIAG
      INTEGER            I, J, K
      DOUBLE PRECISION   SCALE, SUM, VALUE
      DOUBLE PRECISION   SUM, VALUE
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ
 *     .. Local Arrays ..
      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME, DISNAN
      EXTERNAL           LSAME, DISNAN
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ, DCOMBSSQ
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, SQRT
 *     ..
@@ -306,45 +310,64 @@
      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
 *        SSQ(1) is scale
 *        SSQ(2) is sum-of-squares
 *        For better accuracy, sum each column separately.
 *
         IF( LSAME( UPLO, 'U' ) ) THEN
            IF( LSAME( DIAG, 'U' ) ) THEN
               SCALE = ONE
               SUM = N
               SSQ( 1 ) = ONE
               SSQ( 2 ) = N
               K = 2
               DO 280 J = 2, N
                  CALL DLASSQ( J-1, AP( K ), 1, SCALE, SUM )
                  COLSSQ( 1 ) = ZERO
                  COLSSQ( 2 ) = ONE
                  CALL DLASSQ( J-1, AP( K ), 1,
     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
                  CALL DCOMBSSQ( SSQ, COLSSQ )
                  K = K + J
  280          CONTINUE
            ELSE
               SCALE = ZERO
               SUM = ONE
               SSQ( 1 ) = ZERO
               SSQ( 2 ) = ONE
               K = 1
               DO 290 J = 1, N
                  CALL DLASSQ( J, AP( K ), 1, SCALE, SUM )
                  COLSSQ( 1 ) = ZERO
                  COLSSQ( 2 ) = ONE
                  CALL DLASSQ( J, AP( K ), 1,
     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
                  CALL DCOMBSSQ( SSQ, COLSSQ )
                  K = K + J
  290          CONTINUE
            END IF
         ELSE
            IF( LSAME( DIAG, 'U' ) ) THEN
               SCALE = ONE
               SUM = N
               SSQ( 1 ) = ONE
               SSQ( 2 ) = N
               K = 2
               DO 300 J = 1, N - 1
                  CALL DLASSQ( N-J, AP( K ), 1, SCALE, SUM )
                  COLSSQ( 1 ) = ZERO
                  COLSSQ( 2 ) = ONE
                  CALL DLASSQ( N-J, AP( K ), 1,
     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
                  CALL DCOMBSSQ( SSQ, COLSSQ )
                  K = K + N - J + 1
  300          CONTINUE
            ELSE
               SCALE = ZERO
               SUM = ONE
               SSQ( 1 ) = ZERO
               SSQ( 2 ) = ONE
               K = 1
               DO 310 J = 1, N
                  CALL DLASSQ( N-J+1, AP( K ), 1, SCALE, SUM )
                  COLSSQ( 1 ) = ZERO
                  COLSSQ( 2 ) = ONE
                  CALL DLASSQ( N-J+1, AP( K ), 1,
     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
                  CALL DCOMBSSQ( SSQ, COLSSQ )
                  K = K + N - J + 1
  310          CONTINUE
            END IF
         END IF
         VALUE = SCALE*SQRT( SUM )
         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
      END IF
 *
      DLANTP = VALUE
--- a/lapack-netlib/SRC/dlantr.f
+++ b/lapack-netlib/SRC/dlantr.f
@@ -146,6 +146,7 @@
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     December 2016
 *
      IMPLICIT NONE
 *     .. Scalar Arguments ..
      CHARACTER          DIAG, NORM, UPLO
      INTEGER            LDA, M, N
@@ -163,15 +164,18 @@
 *     .. Local Scalars ..
      LOGICAL            UDIAG
      INTEGER            I, J
      DOUBLE PRECISION   SCALE, SUM, VALUE
      DOUBLE PRECISION   SUM, VALUE
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ
 *     .. Local Arrays ..
      DOUBLE PRECISION   SSQ( 2 ), COLSSQ( 2 )
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME, DISNAN
      EXTERNAL           LSAME, DISNAN
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DLASSQ, DCOMBSSQ
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, MIN, SQRT
 *     ..
@@ -281,7 +285,7 @@
            END IF
         ELSE
            IF( LSAME( DIAG, 'U' ) ) THEN
               DO 210 I = 1, N
               DO 210 I = 1, MIN( M, N )
                  WORK( I ) = ONE
  210          CONTINUE
               DO 220 I = N + 1, M
@@ -311,38 +315,56 @@
      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
 *
 *        Find normF(A).
 *        SSQ(1) is scale
 *        SSQ(2) is sum-of-squares
 *        For better accuracy, sum each column separately.
 *
         IF( LSAME( UPLO, 'U' ) ) THEN
            IF( LSAME( DIAG, 'U' ) ) THEN
               SCALE = ONE
               SUM = MIN( M, N )
               SSQ( 1 ) = ONE
               SSQ( 2 ) = MIN( M, N )
               DO 290 J = 2, N
                  CALL DLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM )
                  COLSSQ( 1 ) = ZERO
                  COLSSQ( 2 ) = ONE
                  CALL DLASSQ( MIN( M, J-1 ), A( 1, J ), 1,
     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
                  CALL DCOMBSSQ( SSQ, COLSSQ )
  290          CONTINUE
            ELSE
               SCALE = ZERO
               SUM = ONE
               SSQ( 1 ) = ZERO
               SSQ( 2 ) = ONE
               DO 300 J = 1, N
                  CALL DLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM )
                  COLSSQ( 1 ) = ZERO
                  COLSSQ( 2 ) = ONE
                  CALL DLASSQ( MIN( M, J ), A( 1, J ), 1,
     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
                  CALL DCOMBSSQ( SSQ, COLSSQ )
  300          CONTINUE
            END IF
         ELSE
            IF( LSAME( DIAG, 'U' ) ) THEN
               SCALE = ONE
               SUM = MIN( M, N )
               SSQ( 1 ) = ONE
               SSQ( 2 ) = MIN( M, N )
               DO 310 J = 1, N
                  CALL DLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE,
     $                         SUM )
                  COLSSQ( 1 ) = ZERO
                  COLSSQ( 2 ) = ONE
                  CALL DLASSQ( M-J, A( MIN( M, J+1 ), J ), 1,
     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
                  CALL DCOMBSSQ( SSQ, COLSSQ )
  310          CONTINUE
            ELSE
               SCALE = ZERO
               SUM = ONE
               SSQ( 1 ) = ZERO
               SSQ( 2 ) = ONE
               DO 320 J = 1, N
                  CALL DLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM )
                  COLSSQ( 1 ) = ZERO
                  COLSSQ( 2 ) = ONE
                  CALL DLASSQ( M-J+1, A( J, J ), 1,
     $                         COLSSQ( 1 ), COLSSQ( 2 ) )
                  CALL DCOMBSSQ( SSQ, COLSSQ )
  320          CONTINUE
            END IF
         END IF
         VALUE = SCALE*SQRT( SUM )
         VALUE = SSQ( 1 )*SQRT( SSQ( 2 ) )
      END IF
 *
      DLANTR = VALUE
--- a/lapack-netlib/SRC/dlanv2.f
+++ b/lapack-netlib/SRC/dlanv2.f
@@ -161,7 +161,6 @@
      IF( C.EQ.ZERO ) THEN
         CS = ONE
         SN = ZERO
         GO TO 10
 *
      ELSE IF( B.EQ.ZERO ) THEN
 *
@@ -174,12 +173,12 @@
         A = TEMP
         B = -C
         C = ZERO
         GO TO 10
 *
      ELSE IF( ( A-D ).EQ.ZERO .AND. SIGN( ONE, B ).NE.SIGN( ONE, C ) )
     $          THEN
         CS = ONE
         SN = ZERO
         GO TO 10
 *
      ELSE
 *
         TEMP = A - D
@@ -207,6 +206,7 @@
            SN = C / TAU
            B = B - C
            C = ZERO
 *
         ELSE
 *
 *           Complex eigenvalues, or real (almost) equal eigenvalues.
@@ -268,8 +268,6 @@
         END IF
 *
      END IF
 *
   10 CONTINUE
 *
 *     Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I).
 *
--- a/lapack-netlib/SRC/dlaorhr_col_getrfnp.f
+++ b/lapack-netlib/SRC/dlaorhr_col_getrfnp.f
@@ -0,0 +1,248 @@
 *> \brief \b DLAORHR_COL_GETRFNP
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLAORHR_COL_GETRFNP + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaorhr_col_getrfnp.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaorhr_col_getrfnp.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaorhr_col_getrfnp.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAORHR_COL_GETRFNP( M, N, A, LDA, D, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), D( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DLAORHR_COL_GETRFNP computes the modified LU factorization without
 *> pivoting of a real general M-by-N matrix A. The factorization has
 *> the form:
 *>
 *>     A - S = L * U,
 *>
 *> where:
 *>    S is a m-by-n diagonal sign matrix with the diagonal D, so that
 *>    D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
 *>    as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
 *>    i-1 steps of Gaussian elimination. This means that the diagonal
 *>    element at each step of "modified" Gaussian elimination is
 *>    at least one in absolute value (so that division-by-zero not
 *>    not possible during the division by the diagonal element);
 *>
 *>    L is a M-by-N lower triangular matrix with unit diagonal elements
 *>    (lower trapezoidal if M > N);
 *>
 *>    and U is a M-by-N upper triangular matrix
 *>    (upper trapezoidal if M < N).
 *>
 *> This routine is an auxiliary routine used in the Householder
 *> reconstruction routine DORHR_COL. In DORHR_COL, this routine is
 *> applied to an M-by-N matrix A with orthonormal columns, where each
 *> element is bounded by one in absolute value. With the choice of
 *> the matrix S above, one can show that the diagonal element at each
 *> step of Gaussian elimination is the largest (in absolute value) in
 *> the column on or below the diagonal, so that no pivoting is required
 *> for numerical stability [1].
 *>
 *> For more details on the Householder reconstruction algorithm,
 *> including the modified LU factorization, see [1].
 *>
 *> This is the blocked right-looking version of the algorithm,
 *> calling Level 3 BLAS to update the submatrix. To factorize a block,
 *> this routine calls the recursive routine DLAORHR_COL_GETRFNP2.
 *>
 *> [1] "Reconstructing Householder vectors from tall-skinny QR",
 *>     G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
 *>     E. Solomonik, J. Parallel Distrib. Comput.,
 *>     vol. 85, pp. 3-31, 2015.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows of the matrix A.  M >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On entry, the M-by-N matrix to be factored.
 *>          On exit, the factors L and U from the factorization
 *>          A-S=L*U; the unit diagonal elements of L are not stored.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[out] D
 *> \verbatim
 *>          D is DOUBLE PRECISION array, dimension min(M,N)
 *>          The diagonal elements of the diagonal M-by-N sign matrix S,
 *>          D(i) = S(i,i), where 1 <= i <= min(M,N). The elements can
 *>          be only plus or minus one.
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value
 *> \endverbatim
 *>
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \date November 2019
 *
 *> \ingroup doubleGEcomputational
 *
 *> \par Contributors:
 *  ==================
 *>
 *> \verbatim
 *>
 *> November 2019, Igor Kozachenko,
 *>                Computer Science Division,
 *>                University of California, Berkeley
 *>
 *> \endverbatim
 *
 *  =====================================================================
      SUBROUTINE DLAORHR_COL_GETRFNP( M, N, A, LDA, D, INFO )
      IMPLICIT NONE
 *
 *  -- LAPACK computational routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     November 2019
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, LDA, M, N
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), D( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE
      PARAMETER          ( ONE = 1.0D+0 )
 *     ..
 *     .. Local Scalars ..
      INTEGER            IINFO, J, JB, NB
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DGEMM, DLAORHR_COL_GETRFNP2, DTRSM, XERBLA
 *     ..
 *     .. External Functions ..
      INTEGER            ILAENV
      EXTERNAL           ILAENV
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters.
 *
      INFO = 0
      IF( M.LT.0 ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
         INFO = -4
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DLAORHR_COL_GETRFNP', -INFO )
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      IF( MIN( M, N ).EQ.0 )
     $   RETURN
 *
 *     Determine the block size for this environment.
 *

      NB = ILAENV( 1, 'DLAORHR_COL_GETRFNP', ' ', M, N, -1, -1 )

      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
 *
 *        Use unblocked code.
 *
         CALL DLAORHR_COL_GETRFNP2( M, N, A, LDA, D, INFO )
      ELSE
 *
 *        Use blocked code.
 *
         DO J = 1, MIN( M, N ), NB
            JB = MIN( MIN( M, N )-J+1, NB )
 *
 *           Factor diagonal and subdiagonal blocks.
 *
            CALL DLAORHR_COL_GETRFNP2( M-J+1, JB, A( J, J ), LDA,
     $                                 D( J ), IINFO )
 *
            IF( J+JB.LE.N ) THEN
 *
 *              Compute block row of U.
 *
               CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB,
     $                     N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ),
     $                     LDA )
               IF( J+JB.LE.M ) THEN
 *
 *                 Update trailing submatrix.
 *
                  CALL DGEMM( 'No transpose', 'No transpose', M-J-JB+1,
     $                        N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA,
     $                        A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ),
     $                        LDA )
               END IF
            END IF
         END DO
      END IF
      RETURN
 *
 *     End of DLAORHR_COL_GETRFNP
 *
      END
--- a/lapack-netlib/SRC/dlaorhr_col_getrfnp2.f
+++ b/lapack-netlib/SRC/dlaorhr_col_getrfnp2.f
@@ -0,0 +1,305 @@
 *> \brief \b DLAORHR_COL_GETRFNP2
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLAORHR_GETRF2NP + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaorhr_col_getrfnp2.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaorhr_col_getrfnp2.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaorhr_col_getrfnp2.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       RECURSIVE SUBROUTINE DLAORHR_COL_GETRFNP2( M, N, A, LDA, D, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            INFO, LDA, M, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   A( LDA, * ), D( * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DLAORHR_COL_GETRFNP2 computes the modified LU factorization without
 *> pivoting of a real general M-by-N matrix A. The factorization has
 *> the form:
 *>
 *>     A - S = L * U,
 *>
 *> where:
 *>    S is a m-by-n diagonal sign matrix with the diagonal D, so that
 *>    D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
 *>    as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
 *>    i-1 steps of Gaussian elimination. This means that the diagonal
 *>    element at each step of "modified" Gaussian elimination is at
 *>    least one in absolute value (so that division-by-zero not
 *>    possible during the division by the diagonal element);
 *>
 *>    L is a M-by-N lower triangular matrix with unit diagonal elements
 *>    (lower trapezoidal if M > N);
 *>
 *>    and U is a M-by-N upper triangular matrix
 *>    (upper trapezoidal if M < N).
 *>
 *> This routine is an auxiliary routine used in the Householder
 *> reconstruction routine DORHR_COL. In DORHR_COL, this routine is
 *> applied to an M-by-N matrix A with orthonormal columns, where each
 *> element is bounded by one in absolute value. With the choice of
 *> the matrix S above, one can show that the diagonal element at each
 *> step of Gaussian elimination is the largest (in absolute value) in
 *> the column on or below the diagonal, so that no pivoting is required
 *> for numerical stability [1].
 *>
 *> For more details on the Householder reconstruction algorithm,
 *> including the modified LU factorization, see [1].
 *>
 *> This is the recursive version of the LU factorization algorithm.
 *> Denote A - S by B. The algorithm divides the matrix B into four
 *> submatrices:
 *>
 *>        [  B11 | B12  ]  where B11 is n1 by n1,
 *>    B = [ -----|----- ]        B21 is (m-n1) by n1,
 *>        [  B21 | B22  ]        B12 is n1 by n2,
 *>                               B22 is (m-n1) by n2,
 *>                               with n1 = min(m,n)/2, n2 = n-n1.
 *>
 *>
 *> The subroutine calls itself to factor B11, solves for B21,
 *> solves for B12, updates B22, then calls itself to factor B22.
 *>
 *> For more details on the recursive LU algorithm, see [2].
 *>
 *> DLAORHR_COL_GETRFNP2 is called to factorize a block by the blocked
 *> routine DLAORHR_COL_GETRFNP, which uses blocked code calling
 *. Level 3 BLAS to update the submatrix. However, DLAORHR_COL_GETRFNP2
 *> is self-sufficient and can be used without DLAORHR_COL_GETRFNP.
 *>
 *> [1] "Reconstructing Householder vectors from tall-skinny QR",
 *>     G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
 *>     E. Solomonik, J. Parallel Distrib. Comput.,
 *>     vol. 85, pp. 3-31, 2015.
 *>
 *> [2] "Recursion leads to automatic variable blocking for dense linear
 *>     algebra algorithms", F. Gustavson, IBM J. of Res. and Dev.,
 *>     vol. 41, no. 6, pp. 737-755, 1997.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows of the matrix A.  M >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns of the matrix A.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          On entry, the M-by-N matrix to be factored.
 *>          On exit, the factors L and U from the factorization
 *>          A-S=L*U; the unit diagonal elements of L are not stored.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[out] D
 *> \verbatim
 *>          D is DOUBLE PRECISION array, dimension min(M,N)
 *>          The diagonal elements of the diagonal M-by-N sign matrix S,
 *>          D(i) = S(i,i), where 1 <= i <= min(M,N). The elements can
 *>          be only plus or minus one.
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value
 *> \endverbatim
 *>
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \date November 2019
 *
 *> \ingroup doubleGEcomputational
 *
 *> \par Contributors:
 *  ==================
 *>
 *> \verbatim
 *>
 *> November 2019, Igor Kozachenko,
 *>                Computer Science Division,
 *>                University of California, Berkeley
 *>
 *> \endverbatim
 *
 *  =====================================================================
      RECURSIVE SUBROUTINE DLAORHR_COL_GETRFNP2( M, N, A, LDA, D, INFO )
      IMPLICIT NONE
 *
 *  -- LAPACK computational routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     November 2019
 *
 *     .. Scalar Arguments ..
      INTEGER            INFO, LDA, M, N
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * ), D( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE
      PARAMETER          ( ONE = 1.0D+0 )
 *     ..
 *     .. Local Scalars ..
      DOUBLE PRECISION   SFMIN
      INTEGER            I, IINFO, N1, N2
 *     ..
 *     .. External Functions ..
      DOUBLE PRECISION   DLAMCH
      EXTERNAL           DLAMCH
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DGEMM, DSCAL, DTRSM, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          ABS, DSIGN, MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters
 *
      INFO = 0
      IF( M.LT.0 ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
         INFO = -4
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DLAORHR_COL_GETRFNP2', -INFO )
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      IF( MIN( M, N ).EQ.0 )
     $   RETURN

      IF ( M.EQ.1 ) THEN
 *
 *        One row case, (also recursion termination case),
 *        use unblocked code
 *
 *        Transfer the sign
 *
         D( 1 ) = -DSIGN( ONE, A( 1, 1 ) )
 *
 *        Construct the row of U
 *
         A( 1, 1 ) = A( 1, 1 ) - D( 1 )
 *
      ELSE IF( N.EQ.1 ) THEN
 *
 *        One column case, (also recursion termination case),
 *        use unblocked code
 *
 *        Transfer the sign
 *
         D( 1 ) = -DSIGN( ONE, A( 1, 1 ) )
 *
 *        Construct the row of U
 *
         A( 1, 1 ) = A( 1, 1 ) - D( 1 )
 *
 *        Scale the elements 2:M of the column
 *
 *        Determine machine safe minimum
 *
         SFMIN = DLAMCH('S')
 *
 *        Construct the subdiagonal elements of L
 *
         IF( ABS( A( 1, 1 ) ) .GE. SFMIN ) THEN
            CALL DSCAL( M-1, ONE / A( 1, 1 ), A( 2, 1 ), 1 )
         ELSE
            DO I = 2, M
               A( I, 1 ) = A( I, 1 ) / A( 1, 1 )
            END DO
         END IF
 *
      ELSE
 *
 *        Divide the matrix B into four submatrices
 *
         N1 = MIN( M, N ) / 2
         N2 = N-N1

 *
 *        Factor B11, recursive call
 *
         CALL DLAORHR_COL_GETRFNP2( N1, N1, A, LDA, D, IINFO )
 *
 *        Solve for B21
 *
         CALL DTRSM( 'R', 'U', 'N', 'N', M-N1, N1, ONE, A, LDA,
     $               A( N1+1, 1 ), LDA )
 *
 *        Solve for B12
 *
         CALL DTRSM( 'L', 'L', 'N', 'U', N1, N2, ONE, A, LDA,
     $               A( 1, N1+1 ), LDA )
 *
 *        Update B22, i.e. compute the Schur complement
 *        B22 := B22 - B21*B12
 *
         CALL DGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( N1+1, 1 ), LDA,
     $               A( 1, N1+1 ), LDA, ONE, A( N1+1, N1+1 ), LDA )
 *
 *        Factor B22, recursive call
 *
         CALL DLAORHR_COL_GETRFNP2( M-N1, N2, A( N1+1, N1+1 ), LDA,
     $                              D( N1+1 ), IINFO )
 *
      END IF
      RETURN
 *
 *     End of DLAORHR_COL_GETRFNP2
 *
      END
--- a/lapack-netlib/SRC/dlaqps.f
+++ b/lapack-netlib/SRC/dlaqps.f
@@ -127,7 +127,7 @@
 *> \param[in,out] AUXV
 *> \verbatim
 *>          AUXV is DOUBLE PRECISION array, dimension (NB)
 *>          Auxiliar vector.
 *>          Auxiliary vector.
 *> \endverbatim
 *>
 *> \param[in,out] F
--- a/lapack-netlib/SRC/dlaqr0.f
+++ b/lapack-netlib/SRC/dlaqr0.f
@@ -67,7 +67,7 @@
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>           The order of the matrix H.  N .GE. 0.
 *>           The order of the matrix H.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in] ILO
@@ -79,12 +79,12 @@
 *> \verbatim
 *>          IHI is INTEGER
 *>           It is assumed that H is already upper triangular in rows
 *>           and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1,
 *>           and columns 1:ILO-1 and IHI+1:N and, if ILO > 1,
 *>           H(ILO,ILO-1) is zero. ILO and IHI are normally set by a
 *>           previous call to DGEBAL, and then passed to DGEHRD when the
 *>           matrix output by DGEBAL is reduced to Hessenberg form.
 *>           Otherwise, ILO and IHI should be set to 1 and N,
 *>           respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
 *>           respectively.  If N > 0, then 1 <= ILO <= IHI <= N.
 *>           If N = 0, then ILO = 1 and IHI = 0.
 *> \endverbatim
 *>
@@ -97,19 +97,19 @@
 *>           decomposition (the Schur form); 2-by-2 diagonal blocks
 *>           (corresponding to complex conjugate pairs of eigenvalues)
 *>           are returned in standard form, with H(i,i) = H(i+1,i+1)
 *>           and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is
 *>           and H(i+1,i)*H(i,i+1) < 0. If INFO = 0 and WANTT is
 *>           .FALSE., then the contents of H are unspecified on exit.
 *>           (The output value of H when INFO.GT.0 is given under the
 *>           (The output value of H when INFO > 0 is given under the
 *>           description of INFO below.)
 *>
 *>           This subroutine may explicitly set H(i,j) = 0 for i.GT.j and
 *>           This subroutine may explicitly set H(i,j) = 0 for i > j and
 *>           j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N.
 *> \endverbatim
 *>
 *> \param[in] LDH
 *> \verbatim
 *>          LDH is INTEGER
 *>           The leading dimension of the array H. LDH .GE. max(1,N).
 *>           The leading dimension of the array H. LDH >= max(1,N).
 *> \endverbatim
 *>
 *> \param[out] WR
@@ -125,7 +125,7 @@
 *>           and WI(ILO:IHI). If two eigenvalues are computed as a
 *>           complex conjugate pair, they are stored in consecutive
 *>           elements of WR and WI, say the i-th and (i+1)th, with
 *>           WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then
 *>           WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., then
 *>           the eigenvalues are stored in the same order as on the
 *>           diagonal of the Schur form returned in H, with
 *>           WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal
@@ -143,7 +143,7 @@
 *>          IHIZ is INTEGER
 *>           Specify the rows of Z to which transformations must be
 *>           applied if WANTZ is .TRUE..
 *>           1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N.
 *>           1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
 *> \endverbatim
 *>
 *> \param[in,out] Z
@@ -153,7 +153,7 @@
 *>           If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is
 *>           replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the
 *>           orthogonal Schur factor of H(ILO:IHI,ILO:IHI).
 *>           (The output value of Z when INFO.GT.0 is given under
 *>           (The output value of Z when INFO > 0 is given under
 *>           the description of INFO below.)
 *> \endverbatim
 *>
@@ -161,7 +161,7 @@
 *> \verbatim
 *>          LDZ is INTEGER
 *>           The leading dimension of the array Z.  if WANTZ is .TRUE.
 *>           then LDZ.GE.MAX(1,IHIZ).  Otherwize, LDZ.GE.1.
 *>           then LDZ >= MAX(1,IHIZ).  Otherwise, LDZ >= 1.
 *> \endverbatim
 *>
 *> \param[out] WORK
@@ -174,7 +174,7 @@
 *> \param[in] LWORK
 *> \verbatim
 *>          LWORK is INTEGER
 *>           The dimension of the array WORK.  LWORK .GE. max(1,N)
 *>           The dimension of the array WORK.  LWORK >= max(1,N)
 *>           is sufficient, but LWORK typically as large as 6*N may
 *>           be required for optimal performance.  A workspace query
 *>           to determine the optimal workspace size is recommended.
@@ -190,19 +190,19 @@
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>             =  0:  successful exit
 *>           .GT. 0:  if INFO = i, DLAQR0 failed to compute all of
 *>             = 0:  successful exit
 *>             > 0:  if INFO = i, DLAQR0 failed to compute all of
 *>                the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
 *>                and WI contain those eigenvalues which have been
 *>                successfully computed.  (Failures are rare.)
 *>
 *>                If INFO .GT. 0 and WANT is .FALSE., then on exit,
 *>                If INFO > 0 and WANT is .FALSE., then on exit,
 *>                the remaining unconverged eigenvalues are the eigen-
 *>                values of the upper Hessenberg matrix rows and
 *>                columns ILO through INFO of the final, output
 *>                value of H.
 *>
 *>                If INFO .GT. 0 and WANTT is .TRUE., then on exit
 *>                If INFO > 0 and WANTT is .TRUE., then on exit
 *>
 *>           (*)  (initial value of H)*U  = U*(final value of H)
 *>
@@ -210,7 +210,7 @@
 *>                value of H is upper Hessenberg and quasi-triangular
 *>                in rows and columns INFO+1 through IHI.
 *>
 *>                If INFO .GT. 0 and WANTZ is .TRUE., then on exit
 *>                If INFO > 0 and WANTZ is .TRUE., then on exit
 *>
 *>                  (final value of Z(ILO:IHI,ILOZ:IHIZ)
 *>                   =  (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U
@@ -218,7 +218,7 @@
 *>                where U is the orthogonal matrix in (*) (regard-
 *>                less of the value of WANTT.)
 *>
 *>                If INFO .GT. 0 and WANTZ is .FALSE., then Z is not
 *>                If INFO > 0 and WANTZ is .FALSE., then Z is not
 *>                accessed.
 *> \endverbatim
 *
@@ -678,7 +678,7 @@
                  END IF
               END IF
 *
 *              ==== Use up to NS of the the smallest magnatiude
 *              ==== Use up to NS of the the smallest magnitude
 *              .    shifts.  If there aren't NS shifts available,
 *              .    then use them all, possibly dropping one to
 *              .    make the number of shifts even. ====
--- a/lapack-netlib/SRC/dlaqr1.f
+++ b/lapack-netlib/SRC/dlaqr1.f
@@ -69,7 +69,7 @@
 *> \verbatim
 *>          LDH is INTEGER
 *>              The leading dimension of H as declared in
 *>              the calling procedure.  LDH.GE.N
 *>              the calling procedure.  LDH >= N
 *> \endverbatim
 *>
 *> \param[in] SR1
--- a/lapack-netlib/SRC/dlaqr2.f
+++ b/lapack-netlib/SRC/dlaqr2.f
@@ -103,7 +103,7 @@
 *> \param[in] NW
 *> \verbatim
 *>          NW is INTEGER
 *>          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
 *>          Deflation window size.  1 <= NW <= (KBOT-KTOP+1).
 *> \endverbatim
 *>
 *> \param[in,out] H
@@ -121,7 +121,7 @@
 *> \verbatim
 *>          LDH is INTEGER
 *>          Leading dimension of H just as declared in the calling
 *>          subroutine.  N .LE. LDH
 *>          subroutine.  N <= LDH
 *> \endverbatim
 *>
 *> \param[in] ILOZ
@@ -133,7 +133,7 @@
 *> \verbatim
 *>          IHIZ is INTEGER
 *>          Specify the rows of Z to which transformations must be
 *>          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
 *>          applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N.
 *> \endverbatim
 *>
 *> \param[in,out] Z
@@ -149,7 +149,7 @@
 *> \verbatim
 *>          LDZ is INTEGER
 *>          The leading dimension of Z just as declared in the
 *>          calling subroutine.  1 .LE. LDZ.
 *>          calling subroutine.  1 <= LDZ.
 *> \endverbatim
 *>
 *> \param[out] NS
@@ -194,13 +194,13 @@
 *> \verbatim
 *>          LDV is INTEGER
 *>          The leading dimension of V just as declared in the
 *>          calling subroutine.  NW .LE. LDV
 *>          calling subroutine.  NW <= LDV
 *> \endverbatim
 *>
 *> \param[in] NH
 *> \verbatim
 *>          NH is INTEGER
 *>          The number of columns of T.  NH.GE.NW.
 *>          The number of columns of T.  NH >= NW.
 *> \endverbatim
 *>
 *> \param[out] T
@@ -212,14 +212,14 @@
 *> \verbatim
 *>          LDT is INTEGER
 *>          The leading dimension of T just as declared in the
 *>          calling subroutine.  NW .LE. LDT
 *>          calling subroutine.  NW <= LDT
 *> \endverbatim
 *>
 *> \param[in] NV
 *> \verbatim
 *>          NV is INTEGER
 *>          The number of rows of work array WV available for
 *>          workspace.  NV.GE.NW.
 *>          workspace.  NV >= NW.
 *> \endverbatim
 *>
 *> \param[out] WV
@@ -231,7 +231,7 @@
 *> \verbatim
 *>          LDWV is INTEGER
 *>          The leading dimension of W just as declared in the
 *>          calling subroutine.  NW .LE. LDV
 *>          calling subroutine.  NW <= LDV
 *> \endverbatim
 *>
 *> \param[out] WORK
--- a/lapack-netlib/SRC/dlaqr3.f
+++ b/lapack-netlib/SRC/dlaqr3.f
@@ -100,7 +100,7 @@
 *> \param[in] NW
 *> \verbatim
 *>          NW is INTEGER
 *>          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
 *>          Deflation window size.  1 <= NW <= (KBOT-KTOP+1).
 *> \endverbatim
 *>
 *> \param[in,out] H
@@ -118,7 +118,7 @@
 *> \verbatim
 *>          LDH is INTEGER
 *>          Leading dimension of H just as declared in the calling
 *>          subroutine.  N .LE. LDH
 *>          subroutine.  N <= LDH
 *> \endverbatim
 *>
 *> \param[in] ILOZ
@@ -130,7 +130,7 @@
 *> \verbatim
 *>          IHIZ is INTEGER
 *>          Specify the rows of Z to which transformations must be
 *>          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
 *>          applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N.
 *> \endverbatim
 *>
 *> \param[in,out] Z
@@ -146,7 +146,7 @@
 *> \verbatim
 *>          LDZ is INTEGER
 *>          The leading dimension of Z just as declared in the
 *>          calling subroutine.  1 .LE. LDZ.
 *>          calling subroutine.  1 <= LDZ.
 *> \endverbatim
 *>
 *> \param[out] NS
@@ -191,13 +191,13 @@
 *> \verbatim
 *>          LDV is INTEGER
 *>          The leading dimension of V just as declared in the
 *>          calling subroutine.  NW .LE. LDV
 *>          calling subroutine.  NW <= LDV
 *> \endverbatim
 *>
 *> \param[in] NH
 *> \verbatim
 *>          NH is INTEGER
 *>          The number of columns of T.  NH.GE.NW.
 *>          The number of columns of T.  NH >= NW.
 *> \endverbatim
 *>
 *> \param[out] T
@@ -209,14 +209,14 @@
 *> \verbatim
 *>          LDT is INTEGER
 *>          The leading dimension of T just as declared in the
 *>          calling subroutine.  NW .LE. LDT
 *>          calling subroutine.  NW <= LDT
 *> \endverbatim
 *>
 *> \param[in] NV
 *> \verbatim
 *>          NV is INTEGER
 *>          The number of rows of work array WV available for
 *>          workspace.  NV.GE.NW.
 *>          workspace.  NV >= NW.
 *> \endverbatim
 *>
 *> \param[out] WV
@@ -228,7 +228,7 @@
 *> \verbatim
 *>          LDWV is INTEGER
 *>          The leading dimension of W just as declared in the
 *>          calling subroutine.  NW .LE. LDV
 *>          calling subroutine.  NW <= LDV
 *> \endverbatim
 *>
 *> \param[out] WORK
--- a/lapack-netlib/SRC/dlaqr4.f
+++ b/lapack-netlib/SRC/dlaqr4.f
@@ -74,7 +74,7 @@
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>           The order of the matrix H.  N .GE. 0.
 *>           The order of the matrix H.  N >= 0.
 *> \endverbatim
 *>
 *> \param[in] ILO
@@ -86,12 +86,12 @@
 *> \verbatim
 *>          IHI is INTEGER
 *>           It is assumed that H is already upper triangular in rows
 *>           and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1,
 *>           and columns 1:ILO-1 and IHI+1:N and, if ILO > 1,
 *>           H(ILO,ILO-1) is zero. ILO and IHI are normally set by a
 *>           previous call to DGEBAL, and then passed to DGEHRD when the
 *>           matrix output by DGEBAL is reduced to Hessenberg form.
 *>           Otherwise, ILO and IHI should be set to 1 and N,
 *>           respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
 *>           respectively.  If N > 0, then 1 <= ILO <= IHI <= N.
 *>           If N = 0, then ILO = 1 and IHI = 0.
 *> \endverbatim
 *>
@@ -104,19 +104,19 @@
 *>           decomposition (the Schur form); 2-by-2 diagonal blocks
 *>           (corresponding to complex conjugate pairs of eigenvalues)
 *>           are returned in standard form, with H(i,i) = H(i+1,i+1)
 *>           and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is
 *>           and H(i+1,i)*H(i,i+1) < 0. If INFO = 0 and WANTT is
 *>           .FALSE., then the contents of H are unspecified on exit.
 *>           (The output value of H when INFO.GT.0 is given under the
 *>           (The output value of H when INFO > 0 is given under the
 *>           description of INFO below.)
 *>
 *>           This subroutine may explicitly set H(i,j) = 0 for i.GT.j and
 *>           This subroutine may explicitly set H(i,j) = 0 for i > j and
 *>           j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N.
 *> \endverbatim
 *>
 *> \param[in] LDH
 *> \verbatim
 *>          LDH is INTEGER
 *>           The leading dimension of the array H. LDH .GE. max(1,N).
 *>           The leading dimension of the array H. LDH >= max(1,N).
 *> \endverbatim
 *>
 *> \param[out] WR
@@ -132,7 +132,7 @@
 *>           and WI(ILO:IHI). If two eigenvalues are computed as a
 *>           complex conjugate pair, they are stored in consecutive
 *>           elements of WR and WI, say the i-th and (i+1)th, with
 *>           WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then
 *>           WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., then
 *>           the eigenvalues are stored in the same order as on the
 *>           diagonal of the Schur form returned in H, with
 *>           WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal
@@ -150,7 +150,7 @@
 *>          IHIZ is INTEGER
 *>           Specify the rows of Z to which transformations must be
 *>           applied if WANTZ is .TRUE..
 *>           1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N.
 *>           1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
 *> \endverbatim
 *>
 *> \param[in,out] Z
@@ -160,7 +160,7 @@
 *>           If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is
 *>           replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the
 *>           orthogonal Schur factor of H(ILO:IHI,ILO:IHI).
 *>           (The output value of Z when INFO.GT.0 is given under
 *>           (The output value of Z when INFO > 0 is given under
 *>           the description of INFO below.)
 *> \endverbatim
 *>
@@ -168,7 +168,7 @@
 *> \verbatim
 *>          LDZ is INTEGER
 *>           The leading dimension of the array Z.  if WANTZ is .TRUE.
 *>           then LDZ.GE.MAX(1,IHIZ).  Otherwize, LDZ.GE.1.
 *>           then LDZ >= MAX(1,IHIZ).  Otherwise, LDZ >= 1.
 *> \endverbatim
 *>
 *> \param[out] WORK
@@ -181,7 +181,7 @@
 *> \param[in] LWORK
 *> \verbatim
 *>          LWORK is INTEGER
 *>           The dimension of the array WORK.  LWORK .GE. max(1,N)
 *>           The dimension of the array WORK.  LWORK >= max(1,N)
 *>           is sufficient, but LWORK typically as large as 6*N may
 *>           be required for optimal performance.  A workspace query
 *>           to determine the optimal workspace size is recommended.
@@ -197,19 +197,19 @@
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>             =  0:  successful exit
 *>           .GT. 0:  if INFO = i, DLAQR4 failed to compute all of
 *>             = 0:  successful exit
 *>             > 0:  if INFO = i, DLAQR4 failed to compute all of
 *>                the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
 *>                and WI contain those eigenvalues which have been
 *>                successfully computed.  (Failures are rare.)
 *>
 *>                If INFO .GT. 0 and WANT is .FALSE., then on exit,
 *>                If INFO > 0 and WANT is .FALSE., then on exit,
 *>                the remaining unconverged eigenvalues are the eigen-
 *>                values of the upper Hessenberg matrix rows and
 *>                columns ILO through INFO of the final, output
 *>                value of H.
 *>
 *>                If INFO .GT. 0 and WANTT is .TRUE., then on exit
 *>                If INFO > 0 and WANTT is .TRUE., then on exit
 *>
 *>           (*)  (initial value of H)*U  = U*(final value of H)
 *>
@@ -217,7 +217,7 @@
 *>                value of  H is upper Hessenberg and triangular in
 *>                rows and columns INFO+1 through IHI.
 *>
 *>                If INFO .GT. 0 and WANTZ is .TRUE., then on exit
 *>                If INFO > 0 and WANTZ is .TRUE., then on exit
 *>
 *>                  (final value of Z(ILO:IHI,ILOZ:IHIZ)
 *>                   =  (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U
@@ -225,7 +225,7 @@
 *>                where U is the orthogonal matrix in (*) (regard-
 *>                less of the value of WANTT.)
 *>
 *>                If INFO .GT. 0 and WANTZ is .FALSE., then Z is not
 *>                If INFO > 0 and WANTZ is .FALSE., then Z is not
 *>                accessed.
 *> \endverbatim
 *
@@ -677,7 +677,7 @@
                  END IF
               END IF
 *
 *              ==== Use up to NS of the the smallest magnatiude
 *              ==== Use up to NS of the the smallest magnitude
 *              .    shifts.  If there aren't NS shifts available,
 *              .    then use them all, possibly dropping one to
 *              .    make the number of shifts even. ====
--- a/lapack-netlib/SRC/dlaqr5.f
+++ b/lapack-netlib/SRC/dlaqr5.f
@@ -133,7 +133,7 @@
 *> \verbatim
 *>          LDH is INTEGER
 *>             LDH is the leading dimension of H just as declared in the
 *>             calling procedure.  LDH.GE.MAX(1,N).
 *>             calling procedure.  LDH >= MAX(1,N).
 *> \endverbatim
 *>
 *> \param[in] ILOZ
@@ -145,7 +145,7 @@
 *> \verbatim
 *>          IHIZ is INTEGER
 *>             Specify the rows of Z to which transformations must be
 *>             applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
 *>             applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
 *> \endverbatim
 *>
 *> \param[in,out] Z
@@ -161,7 +161,7 @@
 *> \verbatim
 *>          LDZ is INTEGER
 *>             LDA is the leading dimension of Z just as declared in
 *>             the calling procedure. LDZ.GE.N.
 *>             the calling procedure. LDZ >= N.
 *> \endverbatim
 *>
 *> \param[out] V
@@ -173,7 +173,7 @@
 *> \verbatim
 *>          LDV is INTEGER
 *>             LDV is the leading dimension of V as declared in the
 *>             calling procedure.  LDV.GE.3.
 *>             calling procedure.  LDV >= 3.
 *> \endverbatim
 *>
 *> \param[out] U
@@ -185,33 +185,14 @@
 *> \verbatim
 *>          LDU is INTEGER
 *>             LDU is the leading dimension of U just as declared in the
 *>             in the calling subroutine.  LDU.GE.3*NSHFTS-3.
 *> \endverbatim
 *>
 *> \param[in] NH
 *> \verbatim
 *>          NH is INTEGER
 *>             NH is the number of columns in array WH available for
 *>             workspace. NH.GE.1.
 *> \endverbatim
 *>
 *> \param[out] WH
 *> \verbatim
 *>          WH is DOUBLE PRECISION array, dimension (LDWH,NH)
 *> \endverbatim
 *>
 *> \param[in] LDWH
 *> \verbatim
 *>          LDWH is INTEGER
 *>             Leading dimension of WH just as declared in the
 *>             calling procedure.  LDWH.GE.3*NSHFTS-3.
 *>             in the calling subroutine.  LDU >= 3*NSHFTS-3.
 *> \endverbatim
 *>
 *> \param[in] NV
 *> \verbatim
 *>          NV is INTEGER
 *>             NV is the number of rows in WV agailable for workspace.
 *>             NV.GE.1.
 *>             NV >= 1.
 *> \endverbatim
 *>
 *> \param[out] WV
@@ -223,9 +204,28 @@
 *> \verbatim
 *>          LDWV is INTEGER
 *>             LDWV is the leading dimension of WV as declared in the
 *>             in the calling subroutine.  LDWV.GE.NV.
 *>             in the calling subroutine.  LDWV >= NV.
 *> \endverbatim
 *
 *> \param[in] NH
 *> \verbatim
 *>          NH is INTEGER
 *>             NH is the number of columns in array WH available for
 *>             workspace. NH >= 1.
 *> \endverbatim
 *>
 *> \param[out] WH
 *> \verbatim
 *>          WH is DOUBLE PRECISION array, dimension (LDWH,NH)
 *> \endverbatim
 *>
 *> \param[in] LDWH
 *> \verbatim
 *>          LDWH is INTEGER
 *>             Leading dimension of WH just as declared in the
 *>             calling procedure.  LDWH >= 3*NSHFTS-3.
 *> \endverbatim
 *>
 *  Authors:
 *  ========
 *
--- a/lapack-netlib/SRC/dlarfb.f
+++ b/lapack-netlib/SRC/dlarfb.f
@@ -92,6 +92,8 @@
 *>          K is INTEGER
 *>          The order of the matrix T (= the number of elementary
 *>          reflectors whose product defines the block reflector).
 *>          If SIDE = 'L', M >= K >= 0;
 *>          if SIDE = 'R', N >= K >= 0.
 *> \endverbatim
 *>
 *> \param[in] V
--- a/lapack-netlib/SRC/dlarfx.f
+++ b/lapack-netlib/SRC/dlarfx.f
@@ -94,7 +94,7 @@
 *> \param[in] LDC
 *> \verbatim
 *>          LDC is INTEGER
 *>          The leading dimension of the array C. LDA >= (1,M).
 *>          The leading dimension of the array C. LDC >= (1,M).
 *> \endverbatim
 *>
 *> \param[out] WORK
--- a/lapack-netlib/SRC/dlarfy.f
+++ b/lapack-netlib/SRC/dlarfy.f
@@ -103,7 +103,7 @@
 *
 *> \date December 2016
 *
 *> \ingroup double_eig
 *> \ingroup doubleOTHERauxiliary
 *
 *  =====================================================================
      SUBROUTINE DLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK )
--- a/lapack-netlib/SRC/dlarrb.f
+++ b/lapack-netlib/SRC/dlarrb.f
@@ -91,7 +91,7 @@
 *>          RTOL2 is DOUBLE PRECISION
 *>          Tolerance for the convergence of the bisection intervals.
 *>          An interval [LEFT,RIGHT] has converged if
 *>          RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )
 *>          RIGHT-LEFT < MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )
 *>          where GAP is the (estimated) distance to the nearest
 *>          eigenvalue.
 *> \endverbatim
@@ -117,7 +117,7 @@
 *>          WGAP is DOUBLE PRECISION array, dimension (N-1)
 *>          On input, the (estimated) gaps between consecutive
 *>          eigenvalues of L D L^T, i.e., WGAP(I-OFFSET) is the gap between
 *>          eigenvalues I and I+1. Note that if IFIRST.EQ.ILAST
 *>          eigenvalues I and I+1. Note that if IFIRST = ILAST
 *>          then WGAP(IFIRST-OFFSET) must be set to ZERO.
 *>          On output, these gaps are refined.
 *> \endverbatim
--- a/lapack-netlib/SRC/dlarre.f
+++ b/lapack-netlib/SRC/dlarre.f
@@ -150,7 +150,7 @@
 *>          RTOL2 is DOUBLE PRECISION
 *>           Parameters for bisection.
 *>           An interval [LEFT,RIGHT] has converged if
 *>           RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )
 *>           RIGHT-LEFT < MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )
 *> \endverbatim
 *>
 *> \param[in] SPLTOL
--- a/lapack-netlib/SRC/dlarrj.f
+++ b/lapack-netlib/SRC/dlarrj.f
@@ -85,7 +85,7 @@
 *>          RTOL is DOUBLE PRECISION
 *>          Tolerance for the convergence of the bisection intervals.
 *>          An interval [LEFT,RIGHT] has converged if
 *>          RIGHT-LEFT.LT.RTOL*MAX(|LEFT|,|RIGHT|).
 *>          RIGHT-LEFT < RTOL*MAX(|LEFT|,|RIGHT|).
 *> \endverbatim
 *>
 *> \param[in] OFFSET
--- a/lapack-netlib/SRC/dlarrv.f
+++ b/lapack-netlib/SRC/dlarrv.f
@@ -149,7 +149,7 @@
 *>          RTOL2 is DOUBLE PRECISION
 *>           Parameters for bisection.
 *>           An interval [LEFT,RIGHT] has converged if
 *>           RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )
 *>           RIGHT-LEFT < MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )
 *> \endverbatim
 *>
 *> \param[in,out] W
--- a/lapack-netlib/SRC/dlasd7.f
+++ b/lapack-netlib/SRC/dlasd7.f
@@ -400,7 +400,7 @@
         VL( I ) = VLW( IDXI )
   50 CONTINUE
 *
 *     Calculate the allowable deflation tolerence
 *     Calculate the allowable deflation tolerance
 *
      EPS = DLAMCH( 'Epsilon' )
      TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
--- a/lapack-netlib/SRC/dlasr.f
+++ b/lapack-netlib/SRC/dlasr.f
@@ -175,7 +175,7 @@
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>          The M-by-N matrix A.  On exit, A is overwritten by P*A if
 *>          SIDE = 'R' or by A*P**T if SIDE = 'L'.
 *>          SIDE = 'L' or by A*P**T if SIDE = 'R'.
 *> \endverbatim
 *>
 *> \param[in] LDA
--- a/lapack-netlib/SRC/dlassq.f
+++ b/lapack-netlib/SRC/dlassq.f
@@ -60,7 +60,7 @@
 *>
 *> \param[in] X
 *> \verbatim
 *>          X is DOUBLE PRECISION array, dimension (N)
 *>          X is DOUBLE PRECISION array, dimension (1+(N-1)*INCX)
 *>          The vector for which a scaled sum of squares is computed.
 *>             x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
 *> \endverbatim
--- a/lapack-netlib/SRC/dlaswlq.f
+++ b/lapack-netlib/SRC/dlaswlq.f
@@ -1,3 +1,4 @@
 *> \brief \b DLASWLQ
 *
 *  Definition:
 *  ===========
@@ -18,9 +19,20 @@
 *>
 *> \verbatim
 *>
 *>          DLASWLQ computes a blocked Short-Wide LQ factorization of a
 *>          M-by-N matrix A, where N >= M:
 *>          A = L * Q
 *> DLASWLQ computes a blocked Tall-Skinny LQ factorization of
 *> a real M-by-N matrix A for M <= N:
 *>
 *>    A = ( L 0 ) *  Q,
 *>
 *> where:
 *>
 *>    Q is a n-by-N orthogonal matrix, stored on exit in an implicit
 *>    form in the elements above the digonal of the array A and in
 *>    the elemenst of the array T;
 *>    L is an lower-triangular M-by-M matrix stored on exit in
 *>    the elements on and below the diagonal of the array A.
 *>    0 is a M-by-(N-M) zero matrix, if M < N, and is not stored.
 *>
 *> \endverbatim
 *
 *  Arguments:
@@ -150,7 +162,7 @@
      SUBROUTINE DLASWLQ( M, N, MB, NB, A, LDA, T, LDT, WORK, LWORK,
     $                  INFO)
 *
 *  -- LAPACK computational routine (version 3.7.1) --
 *  -- LAPACK computational routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. --
 *     June 2017
--- a/lapack-netlib/SRC/dlasyf_aa.f
+++ b/lapack-netlib/SRC/dlasyf_aa.f
@@ -284,8 +284,9 @@
 *
 *              Swap A(I1, I2+1:M) with A(I2, I2+1:M)
 *
               CALL DSWAP( M-I2, A( J1+I1-1, I2+1 ), LDA,
     $                           A( J1+I2-1, I2+1 ), LDA )
               IF( I2.LT.M )
     $            CALL DSWAP( M-I2, A( J1+I1-1, I2+1 ), LDA,
     $                              A( J1+I2-1, I2+1 ), LDA )
 *
 *              Swap A(I1, I1) with A(I2,I2)
 *
@@ -325,13 +326,15 @@
 *           Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1),
 *            where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1)
 *
            IF( A( K, J+1 ).NE.ZERO ) THEN
               ALPHA = ONE / A( K, J+1 )
               CALL DCOPY( M-J-1, WORK( 3 ), 1, A( K, J+2 ), LDA )
               CALL DSCAL( M-J-1, ALPHA, A( K, J+2 ), LDA )
            ELSE
               CALL DLASET( 'Full', 1, M-J-1, ZERO, ZERO,
     $                      A( K, J+2 ), LDA)
            IF( J.LT.(M-1) ) THEN
               IF( A( K, J+1 ).NE.ZERO ) THEN
                  ALPHA = ONE / A( K, J+1 )
                  CALL DCOPY( M-J-1, WORK( 3 ), 1, A( K, J+2 ), LDA )
                  CALL DSCAL( M-J-1, ALPHA, A( K, J+2 ), LDA )
               ELSE
                  CALL DLASET( 'Full', 1, M-J-1, ZERO, ZERO,
     $                         A( K, J+2 ), LDA)
               END IF
            END IF
         END IF
         J = J + 1
@@ -432,8 +435,9 @@
 *
 *              Swap A(I2+1:M, I1) with A(I2+1:M, I2)
 *
               CALL DSWAP( M-I2, A( I2+1, J1+I1-1 ), 1,
     $                           A( I2+1, J1+I2-1 ), 1 )
               IF( I2.LT.M )
     $            CALL DSWAP( M-I2, A( I2+1, J1+I1-1 ), 1,
     $                              A( I2+1, J1+I2-1 ), 1 )
 *
 *              Swap A(I1, I1) with A(I2, I2)
 *
@@ -473,13 +477,15 @@
 *           Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1),
 *            where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1)
 *
            IF( A( J+1, K ).NE.ZERO ) THEN
               ALPHA = ONE / A( J+1, K )
               CALL DCOPY( M-J-1, WORK( 3 ), 1, A( J+2, K ), 1 )
               CALL DSCAL( M-J-1, ALPHA, A( J+2, K ), 1 )
            ELSE
               CALL DLASET( 'Full', M-J-1, 1, ZERO, ZERO,
     $                      A( J+2, K ), LDA )
            IF( J.LT.(M-1) ) THEN
               IF( A( J+1, K ).NE.ZERO ) THEN
                  ALPHA = ONE / A( J+1, K )
                  CALL DCOPY( M-J-1, WORK( 3 ), 1, A( J+2, K ), 1 )
                  CALL DSCAL( M-J-1, ALPHA, A( J+2, K ), 1 )
               ELSE
                  CALL DLASET( 'Full', M-J-1, 1, ZERO, ZERO,
     $                         A( J+2, K ), LDA )
               END IF
            END IF
         END IF
         J = J + 1
--- a/lapack-netlib/SRC/dlasyf_rk.f
+++ b/lapack-netlib/SRC/dlasyf_rk.f
@@ -321,7 +321,7 @@
 *        of A and working backwards, and compute the matrix W = U12*D
 *        for use in updating A11
 *
 *        Initilize the first entry of array E, where superdiagonal
 *        Initialize the first entry of array E, where superdiagonal
 *        elements of D are stored
 *
         E( 1 ) = ZERO
@@ -649,7 +649,7 @@
 *        of A and working forwards, and compute the matrix W = L21*D
 *        for use in updating A22
 *
 *        Initilize the unused last entry of the subdiagonal array E.
 *        Initialize the unused last entry of the subdiagonal array E.
 *
         E( N ) = ZERO
 *
--- a/lapack-netlib/SRC/dlasyf_rook.f
+++ b/lapack-netlib/SRC/dlasyf_rook.f
@@ -21,7 +21,7 @@
 *       SUBROUTINE DLASYF_ROOK( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO )
 *
 *       .. Scalar Arguments ..
 *       CHARADLATER          UPLO
 *       CHARACTER          UPLO
 *       INTEGER            INFO, KB, LDA, LDW, N, NB
 *       ..
 *       .. Array Arguments ..
--- a/lapack-netlib/SRC/dlatdf.f
+++ b/lapack-netlib/SRC/dlatdf.f
@@ -85,7 +85,7 @@
 *>          RHS is DOUBLE PRECISION array, dimension (N)
 *>          On entry, RHS contains contributions from other subsystems.
 *>          On exit, RHS contains the solution of the subsystem with
 *>          entries acoording to the value of IJOB (see above).
 *>          entries according to the value of IJOB (see above).
 *> \endverbatim
 *>
 *> \param[in,out] RDSUM
@@ -260,7 +260,7 @@
 *
 *        Solve for U-part, look-ahead for RHS(N) = +-1. This is not done
 *        in BSOLVE and will hopefully give us a better estimate because
 *        any ill-conditioning of the original matrix is transfered to U
 *        any ill-conditioning of the original matrix is transferred to U
 *        and not to L. U(N, N) is an approximation to sigma_min(LU).
 *
         CALL DCOPY( N-1, RHS, 1, XP, 1 )
--- a/lapack-netlib/SRC/dlatsqr.f
+++ b/lapack-netlib/SRC/dlatsqr.f
@@ -1,3 +1,4 @@
 *> \brief \b DLATSQR
 *
 *  Definition:
 *  ===========
@@ -19,8 +20,22 @@
 *> \verbatim
 *>
 *> DLATSQR computes a blocked Tall-Skinny QR factorization of
 *> an M-by-N matrix A, where M >= N:
 *> A = Q * R .
 *> a real M-by-N matrix A for M >= N:
 *>
 *>    A = Q * ( R ),
 *>            ( 0 )
 *>
 *> where:
 *>
 *>    Q is a M-by-M orthogonal matrix, stored on exit in an implicit
 *>    form in the elements below the digonal of the array A and in
 *>    the elemenst of the array T;
 *>
 *>    R is an upper-triangular N-by-N matrix, stored on exit in
 *>    the elements on and above the diagonal of the array A.
 *>
 *>    0 is a (M-N)-by-N zero matrix, and is not stored.
 *>
 *> \endverbatim
 *
 *  Arguments:
@@ -149,10 +164,10 @@
      SUBROUTINE DLATSQR( M, N, MB, NB, A, LDA, T, LDT, WORK,
     $                    LWORK, INFO)
 *
 *  -- LAPACK computational routine (version 3.7.0) --
 *  -- LAPACK computational routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. --
 *     December 2016
 *     November 2019
 *
 *     .. Scalar Arguments ..
      INTEGER           INFO, LDA, M, N, MB, NB, LDT, LWORK
--- a/lapack-netlib/SRC/dorgtsqr.f
+++ b/lapack-netlib/SRC/dorgtsqr.f
@@ -0,0 +1,306 @@
 *> \brief \b DORGTSQR
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DORGTSQR + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorgtsqr.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorgtsqr.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorgtsqr.f">
 *> [TXT]</a>
 *>
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORGTSQR( M, N, MB, NB, A, LDA, T, LDT, WORK, LWORK,
 *      $                     INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER           INFO, LDA, LDT, LWORK, M, N, MB, NB
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION  A( LDA, * ), T( LDT, * ), WORK( * )
 *       ..
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DORGTSQR generates an M-by-N real matrix Q_out with orthonormal columns,
 *> which are the first N columns of a product of real orthogonal
 *> matrices of order M which are returned by DLATSQR
 *>
 *>      Q_out = first_N_columns_of( Q(1)_in * Q(2)_in * ... * Q(k)_in ).
 *>
 *> See the documentation for DLATSQR.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows of the matrix A.  M >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns of the matrix A. M >= N >= 0.
 *> \endverbatim
 *>
 *> \param[in] MB
 *> \verbatim
 *>          MB is INTEGER
 *>          The row block size used by DLATSQR to return
 *>          arrays A and T. MB > N.
 *>          (Note that if MB > M, then M is used instead of MB
 *>          as the row block size).
 *> \endverbatim
 *>
 *> \param[in] NB
 *> \verbatim
 *>          NB is INTEGER
 *>          The column block size used by DLATSQR to return
 *>          arrays A and T. NB >= 1.
 *>          (Note that if NB > N, then N is used instead of NB
 *>          as the column block size).
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>
 *>          On entry:
 *>
 *>             The elements on and above the diagonal are not accessed.
 *>             The elements below the diagonal represent the unit
 *>             lower-trapezoidal blocked matrix V computed by DLATSQR
 *>             that defines the input matrices Q_in(k) (ones on the
 *>             diagonal are not stored) (same format as the output A
 *>             below the diagonal in DLATSQR).
 *>
 *>          On exit:
 *>
 *>             The array A contains an M-by-N orthonormal matrix Q_out,
 *>             i.e the columns of A are orthogonal unit vectors.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[in] T
 *> \verbatim
 *>          T is DOUBLE PRECISION array,
 *>          dimension (LDT, N * NIRB)
 *>          where NIRB = Number_of_input_row_blocks
 *>                     = MAX( 1, CEIL((M-N)/(MB-N)) )
 *>          Let NICB = Number_of_input_col_blocks
 *>                   = CEIL(N/NB)
 *>
 *>          The upper-triangular block reflectors used to define the
 *>          input matrices Q_in(k), k=(1:NIRB*NICB). The block
 *>          reflectors are stored in compact form in NIRB block
 *>          reflector sequences. Each of NIRB block reflector sequences
 *>          is stored in a larger NB-by-N column block of T and consists
 *>          of NICB smaller NB-by-NB upper-triangular column blocks.
 *>          (same format as the output T in DLATSQR).
 *> \endverbatim
 *>
 *> \param[in] LDT
 *> \verbatim
 *>          LDT is INTEGER
 *>          The leading dimension of the array T.
 *>          LDT >= max(1,min(NB1,N)).
 *> \endverbatim
 *>
 *> \param[out] WORK
 *> \verbatim
 *>          (workspace) DOUBLE PRECISION array, dimension (MAX(2,LWORK))
 *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 *> \endverbatim
 *>
 *> \param[in] LWORK
 *> \verbatim
 *>          The dimension of the array WORK.  LWORK >= (M+NB)*N.
 *>          If LWORK = -1, then a workspace query is assumed.
 *>          The routine only calculates the optimal size of the WORK
 *>          array, returns this value as the first entry of the WORK
 *>          array, and no error message related to LWORK is issued
 *>          by XERBLA.
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value
 *> \endverbatim
 *>
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \date November 2019
 *
 *> \ingroup doubleOTHERcomputational
 *
 *> \par Contributors:
 *  ==================
 *>
 *> \verbatim
 *>
 *> November 2019, Igor Kozachenko,
 *>                Computer Science Division,
 *>                University of California, Berkeley
 *>
 *> \endverbatim
 *
 *  =====================================================================
      SUBROUTINE DORGTSQR( M, N, MB, NB, A, LDA, T, LDT, WORK, LWORK,
     $                     INFO )
      IMPLICIT NONE
 *
 *  -- LAPACK computational routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     November 2019
 *
 *     .. Scalar Arguments ..
      INTEGER           INFO, LDA, LDT, LWORK, M, N, MB, NB
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION  A( LDA, * ), T( LDT, * ), WORK( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE, ZERO
      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
 *     ..
 *     .. Local Scalars ..
      LOGICAL            LQUERY
      INTEGER            IINFO, LDC, LWORKOPT, LC, LW, NBLOCAL, J
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DCOPY, DLAMTSQR, DLASET, XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          DBLE, MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters
 *
      LQUERY  = LWORK.EQ.-1
      INFO = 0
      IF( M.LT.0 ) THEN
         INFO = -1
      ELSE IF( N.LT.0 .OR. M.LT.N ) THEN
         INFO = -2
      ELSE IF( MB.LE.N ) THEN
         INFO = -3
      ELSE IF( NB.LT.1 ) THEN
         INFO = -4
      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
         INFO = -6
      ELSE IF( LDT.LT.MAX( 1, MIN( NB, N ) ) ) THEN
         INFO = -8
      ELSE
 *
 *        Test the input LWORK for the dimension of the array WORK.
 *        This workspace is used to store array C(LDC, N) and WORK(LWORK)
 *        in the call to DLAMTSQR. See the documentation for DLAMTSQR.
 *
         IF( LWORK.LT.2 .AND. (.NOT.LQUERY) ) THEN
            INFO = -10
         ELSE
 *
 *           Set block size for column blocks
 *
            NBLOCAL = MIN( NB, N )
 *
 *           LWORK = -1, then set the size for the array C(LDC,N)
 *           in DLAMTSQR call and set the optimal size of the work array
 *           WORK(LWORK) in DLAMTSQR call.
 *
            LDC = M
            LC = LDC*N
            LW = N * NBLOCAL
 *
            LWORKOPT = LC+LW
 *
            IF( ( LWORK.LT.MAX( 1, LWORKOPT ) ).AND.(.NOT.LQUERY) ) THEN
               INFO = -10
            END IF
         END IF
 *
      END IF
 *
 *     Handle error in the input parameters and return workspace query.
 *
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DORGTSQR', -INFO )
         RETURN
      ELSE IF ( LQUERY ) THEN
         WORK( 1 ) = DBLE( LWORKOPT )
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      IF( MIN( M, N ).EQ.0 ) THEN
         WORK( 1 ) = DBLE( LWORKOPT )
         RETURN
      END IF
 *
 *     (1) Form explicitly the tall-skinny M-by-N left submatrix Q1_in
 *     of M-by-M orthogonal matrix Q_in, which is implicitly stored in
 *     the subdiagonal part of input array A and in the input array T.
 *     Perform by the following operation using the routine DLAMTSQR.
 *
 *         Q1_in = Q_in * ( I ), where I is a N-by-N identity matrix,
 *                        ( 0 )        0 is a (M-N)-by-N zero matrix.
 *
 *     (1a) Form M-by-N matrix in the array WORK(1:LDC*N) with ones
 *     on the diagonal and zeros elsewhere.
 *
      CALL DLASET( 'F', M, N, ZERO, ONE, WORK, LDC )
 *
 *     (1b)  On input, WORK(1:LDC*N) stores ( I );
 *                                          ( 0 )
 *
 *           On output, WORK(1:LDC*N) stores Q1_in.
 *
      CALL DLAMTSQR( 'L', 'N', M, N, N, MB, NBLOCAL, A, LDA, T, LDT,
     $               WORK, LDC, WORK( LC+1 ), LW, IINFO )
 *
 *     (2) Copy the result from the part of the work array (1:M,1:N)
 *     with the leading dimension LDC that starts at WORK(1) into
 *     the output array A(1:M,1:N) column-by-column.
 *
      DO J = 1, N
         CALL DCOPY( M, WORK( (J-1)*LDC + 1 ), 1, A( 1, J ), 1 )
      END DO
 *
      WORK( 1 ) = DBLE( LWORKOPT )
      RETURN
 *
 *     End of DORGTSQR
 *
      END
--- a/lapack-netlib/SRC/dorhr_col.f
+++ b/lapack-netlib/SRC/dorhr_col.f
@@ -0,0 +1,440 @@
 *> \brief \b DORHR_COL
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DORHR_COL + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorhr_col.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorhr_col.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorhr_col.f">
 *> [TXT]</a>
 *>
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DORHR_COL( M, N, NB, A, LDA, T, LDT, D, INFO )
 *
 *       .. Scalar Arguments ..
 *       INTEGER           INFO, LDA, LDT, M, N, NB
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION  A( LDA, * ), D( * ), T( LDT, * )
 *       ..
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *>  DORHR_COL takes an M-by-N real matrix Q_in with orthonormal columns
 *>  as input, stored in A, and performs Householder Reconstruction (HR),
 *>  i.e. reconstructs Householder vectors V(i) implicitly representing
 *>  another M-by-N matrix Q_out, with the property that Q_in = Q_out*S,
 *>  where S is an N-by-N diagonal matrix with diagonal entries
 *>  equal to +1 or -1. The Householder vectors (columns V(i) of V) are
 *>  stored in A on output, and the diagonal entries of S are stored in D.
 *>  Block reflectors are also returned in T
 *>  (same output format as DGEQRT).
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] M
 *> \verbatim
 *>          M is INTEGER
 *>          The number of rows of the matrix A. M >= 0.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The number of columns of the matrix A. M >= N >= 0.
 *> \endverbatim
 *>
 *> \param[in] NB
 *> \verbatim
 *>          NB is INTEGER
 *>          The column block size to be used in the reconstruction
 *>          of Householder column vector blocks in the array A and
 *>          corresponding block reflectors in the array T. NB >= 1.
 *>          (Note that if NB > N, then N is used instead of NB
 *>          as the column block size.)
 *> \endverbatim
 *>
 *> \param[in,out] A
 *> \verbatim
 *>          A is DOUBLE PRECISION array, dimension (LDA,N)
 *>
 *>          On entry:
 *>
 *>             The array A contains an M-by-N orthonormal matrix Q_in,
 *>             i.e the columns of A are orthogonal unit vectors.
 *>
 *>          On exit:
 *>
 *>             The elements below the diagonal of A represent the unit
 *>             lower-trapezoidal matrix V of Householder column vectors
 *>             V(i). The unit diagonal entries of V are not stored
 *>             (same format as the output below the diagonal in A from
 *>             DGEQRT). The matrix T and the matrix V stored on output
 *>             in A implicitly define Q_out.
 *>
 *>             The elements above the diagonal contain the factor U
 *>             of the "modified" LU-decomposition:
 *>                Q_in - ( S ) = V * U
 *>                       ( 0 )
 *>             where 0 is a (M-N)-by-(M-N) zero matrix.
 *> \endverbatim
 *>
 *> \param[in] LDA
 *> \verbatim
 *>          LDA is INTEGER
 *>          The leading dimension of the array A.  LDA >= max(1,M).
 *> \endverbatim
 *>
 *> \param[out] T
 *> \verbatim
 *>          T is DOUBLE PRECISION array,
 *>          dimension (LDT, N)
 *>
 *>          Let NOCB = Number_of_output_col_blocks
 *>                   = CEIL(N/NB)
 *>
 *>          On exit, T(1:NB, 1:N) contains NOCB upper-triangular
 *>          block reflectors used to define Q_out stored in compact
 *>          form as a sequence of upper-triangular NB-by-NB column
 *>          blocks (same format as the output T in DGEQRT).
 *>          The matrix T and the matrix V stored on output in A
 *>          implicitly define Q_out. NOTE: The lower triangles
 *>          below the upper-triangular blcoks will be filled with
 *>          zeros. See Further Details.
 *> \endverbatim
 *>
 *> \param[in] LDT
 *> \verbatim
 *>          LDT is INTEGER
 *>          The leading dimension of the array T.
 *>          LDT >= max(1,min(NB,N)).
 *> \endverbatim
 *>
 *> \param[out] D
 *> \verbatim
 *>          D is DOUBLE PRECISION array, dimension min(M,N).
 *>          The elements can be only plus or minus one.
 *>
 *>          D(i) is constructed as D(i) = -SIGN(Q_in_i(i,i)), where
 *>          1 <= i <= min(M,N), and Q_in_i is Q_in after performing
 *>          i-1 steps of “modified” Gaussian elimination.
 *>          See Further Details.
 *> \endverbatim
 *>
 *> \param[out] INFO
 *> \verbatim
 *>          INFO is INTEGER
 *>          = 0:  successful exit
 *>          < 0:  if INFO = -i, the i-th argument had an illegal value
 *> \endverbatim
 *>
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *> The computed M-by-M orthogonal factor Q_out is defined implicitly as
 *> a product of orthogonal matrices Q_out(i). Each Q_out(i) is stored in
 *> the compact WY-representation format in the corresponding blocks of
 *> matrices V (stored in A) and T.
 *>
 *> The M-by-N unit lower-trapezoidal matrix V stored in the M-by-N
 *> matrix A contains the column vectors V(i) in NB-size column
 *> blocks VB(j). For example, VB(1) contains the columns
 *> V(1), V(2), ... V(NB). NOTE: The unit entries on
 *> the diagonal of Y are not stored in A.
 *>
 *> The number of column blocks is
 *>
 *>     NOCB = Number_of_output_col_blocks = CEIL(N/NB)
 *>
 *> where each block is of order NB except for the last block, which
 *> is of order LAST_NB = N - (NOCB-1)*NB.
 *>
 *> For example, if M=6,  N=5 and NB=2, the matrix V is
 *>
 *>
 *>     V = (    VB(1),   VB(2), VB(3) ) =
 *>
 *>       = (   1                      )
 *>         ( v21    1                 )
 *>         ( v31  v32    1            )
 *>         ( v41  v42  v43   1        )
 *>         ( v51  v52  v53  v54    1  )
 *>         ( v61  v62  v63  v54   v65 )
 *>
 *>
 *> For each of the column blocks VB(i), an upper-triangular block
 *> reflector TB(i) is computed. These blocks are stored as
 *> a sequence of upper-triangular column blocks in the NB-by-N
 *> matrix T. The size of each TB(i) block is NB-by-NB, except
 *> for the last block, whose size is LAST_NB-by-LAST_NB.
 *>
 *> For example, if M=6,  N=5 and NB=2, the matrix T is
 *>
 *>     T  = (    TB(1),    TB(2), TB(3) ) =
 *>
 *>        = ( t11  t12  t13  t14   t15  )
 *>          (      t22       t24        )
 *>
 *>
 *> The M-by-M factor Q_out is given as a product of NOCB
 *> orthogonal M-by-M matrices Q_out(i).
 *>
 *>     Q_out = Q_out(1) * Q_out(2) * ... * Q_out(NOCB),
 *>
 *> where each matrix Q_out(i) is given by the WY-representation
 *> using corresponding blocks from the matrices V and T:
 *>
 *>     Q_out(i) = I - VB(i) * TB(i) * (VB(i))**T,
 *>
 *> where I is the identity matrix. Here is the formula with matrix
 *> dimensions:
 *>
 *>  Q(i){M-by-M} = I{M-by-M} -
 *>    VB(i){M-by-INB} * TB(i){INB-by-INB} * (VB(i))**T {INB-by-M},
 *>
 *> where INB = NB, except for the last block NOCB
 *> for which INB=LAST_NB.
 *>
 *> =====
 *> NOTE:
 *> =====
 *>
 *> If Q_in is the result of doing a QR factorization
 *> B = Q_in * R_in, then:
 *>
 *> B = (Q_out*S) * R_in = Q_out * (S * R_in) = O_out * R_out.
 *>
 *> So if one wants to interpret Q_out as the result
 *> of the QR factorization of B, then corresponding R_out
 *> should be obtained by R_out = S * R_in, i.e. some rows of R_in
 *> should be multiplied by -1.
 *>
 *> For the details of the algorithm, see [1].
 *>
 *> [1] "Reconstructing Householder vectors from tall-skinny QR",
 *>     G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
 *>     E. Solomonik, J. Parallel Distrib. Comput.,
 *>     vol. 85, pp. 3-31, 2015.
 *> \endverbatim
 *>
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \date November 2019
 *
 *> \ingroup doubleOTHERcomputational
 *
 *> \par Contributors:
 *  ==================
 *>
 *> \verbatim
 *>
 *> November   2019, Igor Kozachenko,
 *>            Computer Science Division,
 *>            University of California, Berkeley
 *>
 *> \endverbatim
 *
 *  =====================================================================
      SUBROUTINE DORHR_COL( M, N, NB, A, LDA, T, LDT, D, INFO )
      IMPLICIT NONE
 *
 *  -- LAPACK computational routine (version 3.9.0) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     November 2019
 *
 *     .. Scalar Arguments ..
      INTEGER           INFO, LDA, LDT, M, N, NB
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION  A( LDA, * ), D( * ), T( LDT, * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE, ZERO
      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
 *     ..
 *     .. Local Scalars ..
      INTEGER            I, IINFO, J, JB, JBTEMP1, JBTEMP2, JNB,
     $                   NPLUSONE
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DCOPY, DLAORHR_COL_GETRFNP, DSCAL, DTRSM,
     $                   XERBLA
 *     ..
 *     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters
 *
      INFO = 0
      IF( M.LT.0 ) THEN
         INFO = -1
      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
         INFO = -2
      ELSE IF( NB.LT.1 ) THEN
         INFO = -3
      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
         INFO = -5
      ELSE IF( LDT.LT.MAX( 1, MIN( NB, N ) ) ) THEN
         INFO = -7
      END IF
 *
 *     Handle error in the input parameters.
 *
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'DORHR_COL', -INFO )
         RETURN
      END IF
 *
 *     Quick return if possible
 *
      IF( MIN( M, N ).EQ.0 ) THEN
         RETURN
      END IF
 *
 *     On input, the M-by-N matrix A contains the orthogonal
 *     M-by-N matrix Q_in.
 *
 *     (1) Compute the unit lower-trapezoidal V (ones on the diagonal
 *     are not stored) by performing the "modified" LU-decomposition.
 *
 *     Q_in - ( S ) = V * U = ( V1 ) * U,
 *            ( 0 )           ( V2 )
 *
 *     where 0 is an (M-N)-by-N zero matrix.
 *
 *     (1-1) Factor V1 and U.

      CALL DLAORHR_COL_GETRFNP( N, N, A, LDA, D, IINFO )
 *
 *     (1-2) Solve for V2.
 *
      IF( M.GT.N ) THEN
         CALL DTRSM( 'R', 'U', 'N', 'N', M-N, N, ONE, A, LDA,
     $               A( N+1, 1 ), LDA )
      END IF
 *
 *     (2) Reconstruct the block reflector T stored in T(1:NB, 1:N)
 *     as a sequence of upper-triangular blocks with NB-size column
 *     blocking.
 *
 *     Loop over the column blocks of size NB of the array A(1:M,1:N)
 *     and the array T(1:NB,1:N), JB is the column index of a column
 *     block, JNB is the column block size at each step JB.
 *
      NPLUSONE = N + 1
      DO JB = 1, N, NB
 *
 *        (2-0) Determine the column block size JNB.
 *
         JNB = MIN( NPLUSONE-JB, NB )
 *
 *        (2-1) Copy the upper-triangular part of the current JNB-by-JNB
 *        diagonal block U(JB) (of the N-by-N matrix U) stored
 *        in A(JB:JB+JNB-1,JB:JB+JNB-1) into the upper-triangular part
 *        of the current JNB-by-JNB block T(1:JNB,JB:JB+JNB-1)
 *        column-by-column, total JNB*(JNB+1)/2 elements.
 *
         JBTEMP1 = JB - 1
         DO J = JB, JB+JNB-1
            CALL DCOPY( J-JBTEMP1, A( JB, J ), 1, T( 1, J ), 1 )
         END DO
 *
 *        (2-2) Perform on the upper-triangular part of the current
 *        JNB-by-JNB diagonal block U(JB) (of the N-by-N matrix U) stored
 *        in T(1:JNB,JB:JB+JNB-1) the following operation in place:
 *        (-1)*U(JB)*S(JB), i.e the result will be stored in the upper-
 *        triangular part of T(1:JNB,JB:JB+JNB-1). This multiplication
 *        of the JNB-by-JNB diagonal block U(JB) by the JNB-by-JNB
 *        diagonal block S(JB) of the N-by-N sign matrix S from the
 *        right means changing the sign of each J-th column of the block
 *        U(JB) according to the sign of the diagonal element of the block
 *        S(JB), i.e. S(J,J) that is stored in the array element D(J).
 *
         DO J = JB, JB+JNB-1
            IF( D( J ).EQ.ONE ) THEN
               CALL DSCAL( J-JBTEMP1, -ONE, T( 1, J ), 1 )
            END IF
         END DO
 *
 *        (2-3) Perform the triangular solve for the current block
 *        matrix X(JB):
 *
 *               X(JB) * (A(JB)**T) = B(JB), where:
 *
 *               A(JB)**T  is a JNB-by-JNB unit upper-triangular
 *                         coefficient block, and A(JB)=V1(JB), which
 *                         is a JNB-by-JNB unit lower-triangular block
 *                         stored in A(JB:JB+JNB-1,JB:JB+JNB-1).
 *                         The N-by-N matrix V1 is the upper part
 *                         of the M-by-N lower-trapezoidal matrix V
 *                         stored in A(1:M,1:N);
 *
 *               B(JB)     is a JNB-by-JNB  upper-triangular right-hand
 *                         side block, B(JB) = (-1)*U(JB)*S(JB), and
 *                         B(JB) is stored in T(1:JNB,JB:JB+JNB-1);
 *
 *               X(JB)     is a JNB-by-JNB upper-triangular solution
 *                         block, X(JB) is the upper-triangular block
 *                         reflector T(JB), and X(JB) is stored
 *                         in T(1:JNB,JB:JB+JNB-1).
 *
 *             In other words, we perform the triangular solve for the
 *             upper-triangular block T(JB):
 *
 *               T(JB) * (V1(JB)**T) = (-1)*U(JB)*S(JB).
 *
 *             Even though the blocks X(JB) and B(JB) are upper-
 *             triangular, the routine DTRSM will access all JNB**2
 *             elements of the square T(1:JNB,JB:JB+JNB-1). Therefore,
 *             we need to set to zero the elements of the block
 *             T(1:JNB,JB:JB+JNB-1) below the diagonal before the call
 *             to DTRSM.
 *
 *        (2-3a) Set the elements to zero.
 *
         JBTEMP2 = JB - 2
         DO J = JB, JB+JNB-2
            DO I = J-JBTEMP2, NB
               T( I, J ) = ZERO
            END DO
         END DO
 *
 *        (2-3b) Perform the triangular solve.
 *
         CALL DTRSM( 'R', 'L', 'T', 'U', JNB, JNB, ONE,
     $               A( JB, JB ), LDA, T( 1, JB ), LDT )
 *
      END DO
 *
      RETURN
 *
 *     End of DORHR_COL
 *
      END