Merge pull request #5064 from martin-frbg/lapack1080

Replace LAPACK ?LARFT with a recursive implementation (Reference-LAPACK PR 1080)
1 year ago · f422845b6d
--- a/lapack-netlib/SRC/VARIANTS/Makefile
+++ b/lapack-netlib/SRC/VARIANTS/Makefile
@@ -30,9 +30,11 @@ LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o

 QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o

 LARFTL2 = larft/LL-LVL2/clarft.o larft/LL-LVL2/dlarft.o larft/LL-LVL2/slarft.o larft/LL-LVL2/zlarft.o


 .PHONY: all
 all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a
 all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a larftl2.a

 cholrl.a: $(CHOLRL)
 	$(AR) $(ARFLAGS) $@ $^
@@ -58,9 +60,13 @@ qrll.a: $(QRLL)
 	$(AR) $(ARFLAGS) $@ $^
 	$(RANLIB) $@

 larftl2.a: $(LARFTL2)
 	$(AR) $(ARFLAGS) $@ $^
 	$(RANLIB) $@

 .PHONY: clean cleanobj cleanlib
 clean: cleanobj cleanlib
 cleanobj:
 	rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL)
 	rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL) $(LARFTL2)
 cleanlib:
 	rm -f *.a
--- a/lapack-netlib/SRC/VARIANTS/README
+++ b/lapack-netlib/SRC/VARIANTS/README
@@ -23,6 +23,7 @@ This directory contains several variants of LAPACK routines in single/double/com
 	- [sdcz]geqrf with QR Left Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/qr/LL
 	- [sdcz]potrf with Cholesky Right Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/RL
 	- [sdcz]potrf with Cholesky Top Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/TOP
 	- [sdcz]larft using a Left Looking Level 2 BLAS version algorithm - Directory: SRC/VARIANTS/larft/LL-LVL2

 References:For a more detailed description please refer to
 	- [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
@@ -44,6 +45,7 @@ Corresponding libraries created in SRC/VARIANTS:
 	- QR Left Looking : qrll.a
 	- Cholesky Right Looking : cholrl.a
 	- Cholesky Top : choltop.a
 	- LARFT Level 2: larftl2.a


 ===========
--- a/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/clarft.f
+++ b/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/clarft.f
@@ -0,0 +1,328 @@
 *> \brief \b CLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download CLARFT + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clarft.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clarft.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clarft.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *
 *       .. Scalar Arguments ..
 *       CHARACTER          DIRECT, STOREV
 *       INTEGER            K, LDT, LDV, N
 *       ..
 *       .. Array Arguments ..
 *       COMPLEX            T( LDT, * ), TAU( * ), V( LDV, * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> CLARFT forms the triangular factor T of a complex block reflector H
 *> of order n, which is defined as a product of k elementary reflectors.
 *>
 *> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
 *>
 *> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
 *>
 *> If STOREV = 'C', the vector which defines the elementary reflector
 *> H(i) is stored in the i-th column of the array V, and
 *>
 *>    H  =  I - V * T * V**H
 *>
 *> If STOREV = 'R', the vector which defines the elementary reflector
 *> H(i) is stored in the i-th row of the array V, and
 *>
 *>    H  =  I - V**H * T * V
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] DIRECT
 *> \verbatim
 *>          DIRECT is CHARACTER*1
 *>          Specifies the order in which the elementary reflectors are
 *>          multiplied to form the block reflector:
 *>          = 'F': H = H(1) H(2) . . . H(k) (Forward)
 *>          = 'B': H = H(k) . . . H(2) H(1) (Backward)
 *> \endverbatim
 *>
 *> \param[in] STOREV
 *> \verbatim
 *>          STOREV is CHARACTER*1
 *>          Specifies how the vectors which define the elementary
 *>          reflectors are stored (see also Further Details):
 *>          = 'C': columnwise
 *>          = 'R': rowwise
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The order of the block reflector H. N >= 0.
 *> \endverbatim
 *>
 *> \param[in] K
 *> \verbatim
 *>          K is INTEGER
 *>          The order of the triangular factor T (= the number of
 *>          elementary reflectors). K >= 1.
 *> \endverbatim
 *>
 *> \param[in] V
 *> \verbatim
 *>          V is COMPLEX array, dimension
 *>                               (LDV,K) if STOREV = 'C'
 *>                               (LDV,N) if STOREV = 'R'
 *>          The matrix V. See further details.
 *> \endverbatim
 *>
 *> \param[in] LDV
 *> \verbatim
 *>          LDV is INTEGER
 *>          The leading dimension of the array V.
 *>          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
 *> \endverbatim
 *>
 *> \param[in] TAU
 *> \verbatim
 *>          TAU is COMPLEX array, dimension (K)
 *>          TAU(i) must contain the scalar factor of the elementary
 *>          reflector H(i).
 *> \endverbatim
 *>
 *> \param[out] T
 *> \verbatim
 *>          T is COMPLEX array, dimension (LDT,K)
 *>          The k by k triangular factor T of the block reflector.
 *>          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
 *>          lower triangular. The rest of the array is not used.
 *> \endverbatim
 *>
 *> \param[in] LDT
 *> \verbatim
 *>          LDT is INTEGER
 *>          The leading dimension of the array T. LDT >= K.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup larft
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  The shape of the matrix V and the storage of the vectors which define
 *>  the H(i) is best illustrated by the following example with n = 5 and
 *>  k = 3. The elements equal to 1 are not stored.
 *>
 *>  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
 *>
 *>               V = (  1       )                 V = (  1 v1 v1 v1 v1 )
 *>                   ( v1  1    )                     (     1 v2 v2 v2 )
 *>                   ( v1 v2  1 )                     (        1 v3 v3 )
 *>                   ( v1 v2 v3 )
 *>                   ( v1 v2 v3 )
 *>
 *>  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
 *>
 *>               V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
 *>                   ( v1 v2 v3 )                     ( v2 v2 v2  1    )
 *>                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
 *>                   (     1 v3 )
 *>                   (        1 )
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      CHARACTER          DIRECT, STOREV
      INTEGER            K, LDT, LDV, N
 *     ..
 *     .. Array Arguments ..
      COMPLEX            T( LDT, * ), TAU( * ), V( LDV, * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      COMPLEX            ONE, ZERO
      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
 *     ..
 *     .. Local Scalars ..
      INTEGER            I, J, PREVLASTV, LASTV
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           CGEMM, CGEMV, CTRMV
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
      EXTERNAL           LSAME
 *     ..
 *     .. Executable Statements ..
 *
 *     Quick return if possible
 *
      IF( N.EQ.0 )
     $   RETURN
 *
      IF( LSAME( DIRECT, 'F' ) ) THEN
         PREVLASTV = N
         DO I = 1, K
            PREVLASTV = MAX( PREVLASTV, I )
            IF( TAU( I ).EQ.ZERO ) THEN
 *
 *              H(i)  =  I
 *
               DO J = 1, I
                  T( J, I ) = ZERO
               END DO
            ELSE
 *
 *              general case
 *
               IF( LSAME( STOREV, 'C' ) ) THEN
 *                 Skip any trailing zeros.
                  DO LASTV = N, I+1, -1
                     IF( V( LASTV, I ).NE.ZERO ) EXIT
                  END DO
                  DO J = 1, I-1
                     T( J, I ) = -TAU( I ) * CONJG( V( I , J ) )
                  END DO
                  J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i)
 *
                  CALL CGEMV( 'Conjugate transpose', J-I, I-1,
     $                        -TAU( I ), V( I+1, 1 ), LDV,
     $                        V( I+1, I ), 1,
     $                        ONE, T( 1, I ), 1 )
               ELSE
 *                 Skip any trailing zeros.
                  DO LASTV = N, I+1, -1
                     IF( V( I, LASTV ).NE.ZERO ) EXIT
                  END DO
                  DO J = 1, I-1
                     T( J, I ) = -TAU( I ) * V( J , I )
                  END DO
                  J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H
 *
                  CALL CGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ),
     $                        V( 1, I+1 ), LDV, V( I, I+1 ), LDV,
     $                        ONE, T( 1, I ), LDT )
               END IF
 *
 *              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
 *
               CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1,
     $                     T,
     $                     LDT, T( 1, I ), 1 )
               T( I, I ) = TAU( I )
               IF( I.GT.1 ) THEN
                  PREVLASTV = MAX( PREVLASTV, LASTV )
               ELSE
                  PREVLASTV = LASTV
               END IF
            END IF
         END DO
      ELSE
         PREVLASTV = 1
         DO I = K, 1, -1
            IF( TAU( I ).EQ.ZERO ) THEN
 *
 *              H(i)  =  I
 *
               DO J = I, K
                  T( J, I ) = ZERO
               END DO
            ELSE
 *
 *              general case
 *
               IF( I.LT.K ) THEN
                  IF( LSAME( STOREV, 'C' ) ) THEN
 *                    Skip any leading zeros.
                     DO LASTV = 1, I-1
                        IF( V( LASTV, I ).NE.ZERO ) EXIT
                     END DO
                     DO J = I+1, K
                        T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) )
                     END DO
                     J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i)
 *
                     CALL CGEMV( 'Conjugate transpose', N-K+I-J, K-I,
     $                           -TAU( I ), V( J, I+1 ), LDV, V( J, I ),
     $                           1, ONE, T( I+1, I ), 1 )
                  ELSE
 *                    Skip any leading zeros.
                     DO LASTV = 1, I-1
                        IF( V( I, LASTV ).NE.ZERO ) EXIT
                     END DO
                     DO J = I+1, K
                        T( J, I ) = -TAU( I ) * V( J, N-K+I )
                     END DO
                     J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H
 *
                     CALL CGEMM( 'N', 'C', K-I, 1, N-K+I-J,
     $                           -TAU( I ),
     $                           V( I+1, J ), LDV, V( I, J ), LDV,
     $                           ONE, T( I+1, I ), LDT )
                  END IF
 *
 *                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
 *
                  CALL CTRMV( 'Lower', 'No transpose', 'Non-unit',
     $                        K-I,
     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
                  IF( I.GT.1 ) THEN
                     PREVLASTV = MIN( PREVLASTV, LASTV )
                  ELSE
                     PREVLASTV = LASTV
                  END IF
               END IF
               T( I, I ) = TAU( I )
            END IF
         END DO
      END IF
      RETURN
 *
 *     End of CLARFT
 *
      END
--- a/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/dlarft.f
+++ b/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/dlarft.f
@@ -0,0 +1,326 @@
 *> \brief \b DLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLARFT + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlarft.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlarft.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlarft.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *
 *       .. Scalar Arguments ..
 *       CHARACTER          DIRECT, STOREV
 *       INTEGER            K, LDT, LDV, N
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   T( LDT, * ), TAU( * ), V( LDV, * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> DLARFT forms the triangular factor T of a real block reflector H
 *> of order n, which is defined as a product of k elementary reflectors.
 *>
 *> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
 *>
 *> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
 *>
 *> If STOREV = 'C', the vector which defines the elementary reflector
 *> H(i) is stored in the i-th column of the array V, and
 *>
 *>    H  =  I - V * T * V**T
 *>
 *> If STOREV = 'R', the vector which defines the elementary reflector
 *> H(i) is stored in the i-th row of the array V, and
 *>
 *>    H  =  I - V**T * T * V
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] DIRECT
 *> \verbatim
 *>          DIRECT is CHARACTER*1
 *>          Specifies the order in which the elementary reflectors are
 *>          multiplied to form the block reflector:
 *>          = 'F': H = H(1) H(2) . . . H(k) (Forward)
 *>          = 'B': H = H(k) . . . H(2) H(1) (Backward)
 *> \endverbatim
 *>
 *> \param[in] STOREV
 *> \verbatim
 *>          STOREV is CHARACTER*1
 *>          Specifies how the vectors which define the elementary
 *>          reflectors are stored (see also Further Details):
 *>          = 'C': columnwise
 *>          = 'R': rowwise
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The order of the block reflector H. N >= 0.
 *> \endverbatim
 *>
 *> \param[in] K
 *> \verbatim
 *>          K is INTEGER
 *>          The order of the triangular factor T (= the number of
 *>          elementary reflectors). K >= 1.
 *> \endverbatim
 *>
 *> \param[in] V
 *> \verbatim
 *>          V is DOUBLE PRECISION array, dimension
 *>                               (LDV,K) if STOREV = 'C'
 *>                               (LDV,N) if STOREV = 'R'
 *>          The matrix V. See further details.
 *> \endverbatim
 *>
 *> \param[in] LDV
 *> \verbatim
 *>          LDV is INTEGER
 *>          The leading dimension of the array V.
 *>          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
 *> \endverbatim
 *>
 *> \param[in] TAU
 *> \verbatim
 *>          TAU is DOUBLE PRECISION array, dimension (K)
 *>          TAU(i) must contain the scalar factor of the elementary
 *>          reflector H(i).
 *> \endverbatim
 *>
 *> \param[out] T
 *> \verbatim
 *>          T is DOUBLE PRECISION array, dimension (LDT,K)
 *>          The k by k triangular factor T of the block reflector.
 *>          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
 *>          lower triangular. The rest of the array is not used.
 *> \endverbatim
 *>
 *> \param[in] LDT
 *> \verbatim
 *>          LDT is INTEGER
 *>          The leading dimension of the array T. LDT >= K.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup larft
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  The shape of the matrix V and the storage of the vectors which define
 *>  the H(i) is best illustrated by the following example with n = 5 and
 *>  k = 3. The elements equal to 1 are not stored.
 *>
 *>  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
 *>
 *>               V = (  1       )                 V = (  1 v1 v1 v1 v1 )
 *>                   ( v1  1    )                     (     1 v2 v2 v2 )
 *>                   ( v1 v2  1 )                     (        1 v3 v3 )
 *>                   ( v1 v2 v3 )
 *>                   ( v1 v2 v3 )
 *>
 *>  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
 *>
 *>               V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
 *>                   ( v1 v2 v3 )                     ( v2 v2 v2  1    )
 *>                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
 *>                   (     1 v3 )
 *>                   (        1 )
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      CHARACTER          DIRECT, STOREV
      INTEGER            K, LDT, LDV, N
 *     ..
 *     .. Array Arguments ..
      DOUBLE PRECISION   T( LDT, * ), TAU( * ), V( LDV, * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE, ZERO
      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
 *     ..
 *     .. Local Scalars ..
      INTEGER            I, J, PREVLASTV, LASTV
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           DGEMV, DTRMV
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
      EXTERNAL           LSAME
 *     ..
 *     .. Executable Statements ..
 *
 *     Quick return if possible
 *
      IF( N.EQ.0 )
     $   RETURN
 *
      IF( LSAME( DIRECT, 'F' ) ) THEN
         PREVLASTV = N
         DO I = 1, K
            PREVLASTV = MAX( I, PREVLASTV )
            IF( TAU( I ).EQ.ZERO ) THEN
 *
 *              H(i)  =  I
 *
               DO J = 1, I
                  T( J, I ) = ZERO
               END DO
            ELSE
 *
 *              general case
 *
               IF( LSAME( STOREV, 'C' ) ) THEN
 *                 Skip any trailing zeros.
                  DO LASTV = N, I+1, -1
                     IF( V( LASTV, I ).NE.ZERO ) EXIT
                  END DO
                  DO J = 1, I-1
                     T( J, I ) = -TAU( I ) * V( I , J )
                  END DO
                  J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i)
 *
                  CALL DGEMV( 'Transpose', J-I, I-1, -TAU( I ),
     $                        V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE,
     $                        T( 1, I ), 1 )
               ELSE
 *                 Skip any trailing zeros.
                  DO LASTV = N, I+1, -1
                     IF( V( I, LASTV ).NE.ZERO ) EXIT
                  END DO
                  DO J = 1, I-1
                     T( J, I ) = -TAU( I ) * V( J , I )
                  END DO
                  J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T
 *
                  CALL DGEMV( 'No transpose', I-1, J-I, -TAU( I ),
     $                        V( 1, I+1 ), LDV, V( I, I+1 ), LDV, ONE,
     $                        T( 1, I ), 1 )
               END IF
 *
 *              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
 *
               CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1,
     $                     T,
     $                     LDT, T( 1, I ), 1 )
               T( I, I ) = TAU( I )
               IF( I.GT.1 ) THEN
                  PREVLASTV = MAX( PREVLASTV, LASTV )
               ELSE
                  PREVLASTV = LASTV
               END IF
            END IF
         END DO
      ELSE
         PREVLASTV = 1
         DO I = K, 1, -1
            IF( TAU( I ).EQ.ZERO ) THEN
 *
 *              H(i)  =  I
 *
               DO J = I, K
                  T( J, I ) = ZERO
               END DO
            ELSE
 *
 *              general case
 *
               IF( I.LT.K ) THEN
                  IF( LSAME( STOREV, 'C' ) ) THEN
 *                    Skip any leading zeros.
                     DO LASTV = 1, I-1
                        IF( V( LASTV, I ).NE.ZERO ) EXIT
                     END DO
                     DO J = I+1, K
                        T( J, I ) = -TAU( I ) * V( N-K+I , J )
                     END DO
                     J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i)
 *
                     CALL DGEMV( 'Transpose', N-K+I-J, K-I,
     $                           -TAU( I ),
     $                           V( J, I+1 ), LDV, V( J, I ), 1, ONE,
     $                           T( I+1, I ), 1 )
                  ELSE
 *                    Skip any leading zeros.
                     DO LASTV = 1, I-1
                        IF( V( I, LASTV ).NE.ZERO ) EXIT
                     END DO
                     DO J = I+1, K
                        T( J, I ) = -TAU( I ) * V( J, N-K+I )
                     END DO
                     J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T
 *
                     CALL DGEMV( 'No transpose', K-I, N-K+I-J,
     $                    -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV,
     $                    ONE, T( I+1, I ), 1 )
                  END IF
 *
 *                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
 *
                  CALL DTRMV( 'Lower', 'No transpose', 'Non-unit',
     $                        K-I,
     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
                  IF( I.GT.1 ) THEN
                     PREVLASTV = MIN( PREVLASTV, LASTV )
                  ELSE
                     PREVLASTV = LASTV
                  END IF
               END IF
               T( I, I ) = TAU( I )
            END IF
         END DO
      END IF
      RETURN
 *
 *     End of DLARFT
 *
      END
--- a/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/slarft.f
+++ b/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/slarft.f
@@ -0,0 +1,326 @@
 *> \brief \b SLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download SLARFT + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slarft.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slarft.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slarft.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *
 *       .. Scalar Arguments ..
 *       CHARACTER          DIRECT, STOREV
 *       INTEGER            K, LDT, LDV, N
 *       ..
 *       .. Array Arguments ..
 *       REAL               T( LDT, * ), TAU( * ), V( LDV, * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> SLARFT forms the triangular factor T of a real block reflector H
 *> of order n, which is defined as a product of k elementary reflectors.
 *>
 *> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
 *>
 *> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
 *>
 *> If STOREV = 'C', the vector which defines the elementary reflector
 *> H(i) is stored in the i-th column of the array V, and
 *>
 *>    H  =  I - V * T * V**T
 *>
 *> If STOREV = 'R', the vector which defines the elementary reflector
 *> H(i) is stored in the i-th row of the array V, and
 *>
 *>    H  =  I - V**T * T * V
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] DIRECT
 *> \verbatim
 *>          DIRECT is CHARACTER*1
 *>          Specifies the order in which the elementary reflectors are
 *>          multiplied to form the block reflector:
 *>          = 'F': H = H(1) H(2) . . . H(k) (Forward)
 *>          = 'B': H = H(k) . . . H(2) H(1) (Backward)
 *> \endverbatim
 *>
 *> \param[in] STOREV
 *> \verbatim
 *>          STOREV is CHARACTER*1
 *>          Specifies how the vectors which define the elementary
 *>          reflectors are stored (see also Further Details):
 *>          = 'C': columnwise
 *>          = 'R': rowwise
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The order of the block reflector H. N >= 0.
 *> \endverbatim
 *>
 *> \param[in] K
 *> \verbatim
 *>          K is INTEGER
 *>          The order of the triangular factor T (= the number of
 *>          elementary reflectors). K >= 1.
 *> \endverbatim
 *>
 *> \param[in] V
 *> \verbatim
 *>          V is REAL array, dimension
 *>                               (LDV,K) if STOREV = 'C'
 *>                               (LDV,N) if STOREV = 'R'
 *>          The matrix V. See further details.
 *> \endverbatim
 *>
 *> \param[in] LDV
 *> \verbatim
 *>          LDV is INTEGER
 *>          The leading dimension of the array V.
 *>          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
 *> \endverbatim
 *>
 *> \param[in] TAU
 *> \verbatim
 *>          TAU is REAL array, dimension (K)
 *>          TAU(i) must contain the scalar factor of the elementary
 *>          reflector H(i).
 *> \endverbatim
 *>
 *> \param[out] T
 *> \verbatim
 *>          T is REAL array, dimension (LDT,K)
 *>          The k by k triangular factor T of the block reflector.
 *>          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
 *>          lower triangular. The rest of the array is not used.
 *> \endverbatim
 *>
 *> \param[in] LDT
 *> \verbatim
 *>          LDT is INTEGER
 *>          The leading dimension of the array T. LDT >= K.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup larft
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  The shape of the matrix V and the storage of the vectors which define
 *>  the H(i) is best illustrated by the following example with n = 5 and
 *>  k = 3. The elements equal to 1 are not stored.
 *>
 *>  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
 *>
 *>               V = (  1       )                 V = (  1 v1 v1 v1 v1 )
 *>                   ( v1  1    )                     (     1 v2 v2 v2 )
 *>                   ( v1 v2  1 )                     (        1 v3 v3 )
 *>                   ( v1 v2 v3 )
 *>                   ( v1 v2 v3 )
 *>
 *>  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
 *>
 *>               V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
 *>                   ( v1 v2 v3 )                     ( v2 v2 v2  1    )
 *>                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
 *>                   (     1 v3 )
 *>                   (        1 )
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      CHARACTER          DIRECT, STOREV
      INTEGER            K, LDT, LDV, N
 *     ..
 *     .. Array Arguments ..
      REAL               T( LDT, * ), TAU( * ), V( LDV, * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      REAL               ONE, ZERO
      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
 *     ..
 *     .. Local Scalars ..
      INTEGER            I, J, PREVLASTV, LASTV
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           SGEMV, STRMV
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
      EXTERNAL           LSAME
 *     ..
 *     .. Executable Statements ..
 *
 *     Quick return if possible
 *
      IF( N.EQ.0 )
     $   RETURN
 *
      IF( LSAME( DIRECT, 'F' ) ) THEN
         PREVLASTV = N
         DO I = 1, K
            PREVLASTV = MAX( I, PREVLASTV )
            IF( TAU( I ).EQ.ZERO ) THEN
 *
 *              H(i)  =  I
 *
               DO J = 1, I
                  T( J, I ) = ZERO
               END DO
            ELSE
 *
 *              general case
 *
               IF( LSAME( STOREV, 'C' ) ) THEN
 *                 Skip any trailing zeros.
                  DO LASTV = N, I+1, -1
                     IF( V( LASTV, I ).NE.ZERO ) EXIT
                  END DO
                  DO J = 1, I-1
                     T( J, I ) = -TAU( I ) * V( I , J )
                  END DO
                  J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i)
 *
                  CALL SGEMV( 'Transpose', J-I, I-1, -TAU( I ),
     $                        V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE,
     $                        T( 1, I ), 1 )
               ELSE
 *                 Skip any trailing zeros.
                  DO LASTV = N, I+1, -1
                     IF( V( I, LASTV ).NE.ZERO ) EXIT
                  END DO
                  DO J = 1, I-1
                     T( J, I ) = -TAU( I ) * V( J , I )
                  END DO
                  J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T
 *
                  CALL SGEMV( 'No transpose', I-1, J-I, -TAU( I ),
     $                        V( 1, I+1 ), LDV, V( I, I+1 ), LDV,
     $                        ONE, T( 1, I ), 1 )
               END IF
 *
 *              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
 *
               CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1,
     $                     T,
     $                     LDT, T( 1, I ), 1 )
               T( I, I ) = TAU( I )
               IF( I.GT.1 ) THEN
                  PREVLASTV = MAX( PREVLASTV, LASTV )
               ELSE
                  PREVLASTV = LASTV
               END IF
            END IF
         END DO
      ELSE
         PREVLASTV = 1
         DO I = K, 1, -1
            IF( TAU( I ).EQ.ZERO ) THEN
 *
 *              H(i)  =  I
 *
               DO J = I, K
                  T( J, I ) = ZERO
               END DO
            ELSE
 *
 *              general case
 *
               IF( I.LT.K ) THEN
                  IF( LSAME( STOREV, 'C' ) ) THEN
 *                    Skip any leading zeros.
                     DO LASTV = 1, I-1
                        IF( V( LASTV, I ).NE.ZERO ) EXIT
                     END DO
                     DO J = I+1, K
                        T( J, I ) = -TAU( I ) * V( N-K+I , J )
                     END DO
                     J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i)
 *
                     CALL SGEMV( 'Transpose', N-K+I-J, K-I,
     $                           -TAU( I ),
     $                           V( J, I+1 ), LDV, V( J, I ), 1, ONE,
     $                           T( I+1, I ), 1 )
                  ELSE
 *                    Skip any leading zeros.
                     DO LASTV = 1, I-1
                        IF( V( I, LASTV ).NE.ZERO ) EXIT
                     END DO
                     DO J = I+1, K
                        T( J, I ) = -TAU( I ) * V( J, N-K+I )
                     END DO
                     J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T
 *
                     CALL SGEMV( 'No transpose', K-I, N-K+I-J,
     $                    -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV,
     $                    ONE, T( I+1, I ), 1 )
                  END IF
 *
 *                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
 *
                  CALL STRMV( 'Lower', 'No transpose', 'Non-unit',
     $                        K-I,
     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
                  IF( I.GT.1 ) THEN
                     PREVLASTV = MIN( PREVLASTV, LASTV )
                  ELSE
                     PREVLASTV = LASTV
                  END IF
               END IF
               T( I, I ) = TAU( I )
            END IF
         END DO
      END IF
      RETURN
 *
 *     End of SLARFT
 *
      END
--- a/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/zlarft.f
+++ b/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/zlarft.f
@@ -0,0 +1,327 @@
 *> \brief \b ZLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download ZLARFT + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarft.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarft.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarft.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *
 *       .. Scalar Arguments ..
 *       CHARACTER          DIRECT, STOREV
 *       INTEGER            K, LDT, LDV, N
 *       ..
 *       .. Array Arguments ..
 *       COMPLEX*16         T( LDT, * ), TAU( * ), V( LDV, * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *> ZLARFT forms the triangular factor T of a complex block reflector H
 *> of order n, which is defined as a product of k elementary reflectors.
 *>
 *> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
 *>
 *> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
 *>
 *> If STOREV = 'C', the vector which defines the elementary reflector
 *> H(i) is stored in the i-th column of the array V, and
 *>
 *>    H  =  I - V * T * V**H
 *>
 *> If STOREV = 'R', the vector which defines the elementary reflector
 *> H(i) is stored in the i-th row of the array V, and
 *>
 *>    H  =  I - V**H * T * V
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] DIRECT
 *> \verbatim
 *>          DIRECT is CHARACTER*1
 *>          Specifies the order in which the elementary reflectors are
 *>          multiplied to form the block reflector:
 *>          = 'F': H = H(1) H(2) . . . H(k) (Forward)
 *>          = 'B': H = H(k) . . . H(2) H(1) (Backward)
 *> \endverbatim
 *>
 *> \param[in] STOREV
 *> \verbatim
 *>          STOREV is CHARACTER*1
 *>          Specifies how the vectors which define the elementary
 *>          reflectors are stored (see also Further Details):
 *>          = 'C': columnwise
 *>          = 'R': rowwise
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>          The order of the block reflector H. N >= 0.
 *> \endverbatim
 *>
 *> \param[in] K
 *> \verbatim
 *>          K is INTEGER
 *>          The order of the triangular factor T (= the number of
 *>          elementary reflectors). K >= 1.
 *> \endverbatim
 *>
 *> \param[in] V
 *> \verbatim
 *>          V is COMPLEX*16 array, dimension
 *>                               (LDV,K) if STOREV = 'C'
 *>                               (LDV,N) if STOREV = 'R'
 *>          The matrix V. See further details.
 *> \endverbatim
 *>
 *> \param[in] LDV
 *> \verbatim
 *>          LDV is INTEGER
 *>          The leading dimension of the array V.
 *>          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
 *> \endverbatim
 *>
 *> \param[in] TAU
 *> \verbatim
 *>          TAU is COMPLEX*16 array, dimension (K)
 *>          TAU(i) must contain the scalar factor of the elementary
 *>          reflector H(i).
 *> \endverbatim
 *>
 *> \param[out] T
 *> \verbatim
 *>          T is COMPLEX*16 array, dimension (LDT,K)
 *>          The k by k triangular factor T of the block reflector.
 *>          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
 *>          lower triangular. The rest of the array is not used.
 *> \endverbatim
 *>
 *> \param[in] LDT
 *> \verbatim
 *>          LDT is INTEGER
 *>          The leading dimension of the array T. LDT >= K.
 *> \endverbatim
 *
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup larft
 *
 *> \par Further Details:
 *  =====================
 *>
 *> \verbatim
 *>
 *>  The shape of the matrix V and the storage of the vectors which define
 *>  the H(i) is best illustrated by the following example with n = 5 and
 *>  k = 3. The elements equal to 1 are not stored.
 *>
 *>  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
 *>
 *>               V = (  1       )                 V = (  1 v1 v1 v1 v1 )
 *>                   ( v1  1    )                     (     1 v2 v2 v2 )
 *>                   ( v1 v2  1 )                     (        1 v3 v3 )
 *>                   ( v1 v2 v3 )
 *>                   ( v1 v2 v3 )
 *>
 *>  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
 *>
 *>               V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
 *>                   ( v1 v2 v3 )                     ( v2 v2 v2  1    )
 *>                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
 *>                   (     1 v3 )
 *>                   (        1 )
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      CHARACTER          DIRECT, STOREV
      INTEGER            K, LDT, LDV, N
 *     ..
 *     .. Array Arguments ..
      COMPLEX*16         T( LDT, * ), TAU( * ), V( LDV, * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      COMPLEX*16         ONE, ZERO
      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
 *     ..
 *     .. Local Scalars ..
      INTEGER            I, J, PREVLASTV, LASTV
 *     ..
 *     .. External Subroutines ..
      EXTERNAL           ZGEMV, ZTRMV, ZGEMM
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
      EXTERNAL           LSAME
 *     ..
 *     .. Executable Statements ..
 *
 *     Quick return if possible
 *
      IF( N.EQ.0 )
     $   RETURN
 *
      IF( LSAME( DIRECT, 'F' ) ) THEN
         PREVLASTV = N
         DO I = 1, K
            PREVLASTV = MAX( PREVLASTV, I )
            IF( TAU( I ).EQ.ZERO ) THEN
 *
 *              H(i)  =  I
 *
               DO J = 1, I
                  T( J, I ) = ZERO
               END DO
            ELSE
 *
 *              general case
 *
               IF( LSAME( STOREV, 'C' ) ) THEN
 *                 Skip any trailing zeros.
                  DO LASTV = N, I+1, -1
                     IF( V( LASTV, I ).NE.ZERO ) EXIT
                  END DO
                  DO J = 1, I-1
                     T( J, I ) = -TAU( I ) * CONJG( V( I , J ) )
                  END DO
                  J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i)
 *
                  CALL ZGEMV( 'Conjugate transpose', J-I, I-1,
     $                        -TAU( I ), V( I+1, 1 ), LDV,
     $                        V( I+1, I ), 1, ONE, T( 1, I ), 1 )
               ELSE
 *                 Skip any trailing zeros.
                  DO LASTV = N, I+1, -1
                     IF( V( I, LASTV ).NE.ZERO ) EXIT
                  END DO
                  DO J = 1, I-1
                     T( J, I ) = -TAU( I ) * V( J , I )
                  END DO
                  J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H
 *
                  CALL ZGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ),
     $                        V( 1, I+1 ), LDV, V( I, I+1 ), LDV,
     $                        ONE, T( 1, I ), LDT )
               END IF
 *
 *              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
 *
               CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1,
     $                     T,
     $                     LDT, T( 1, I ), 1 )
               T( I, I ) = TAU( I )
               IF( I.GT.1 ) THEN
                  PREVLASTV = MAX( PREVLASTV, LASTV )
               ELSE
                  PREVLASTV = LASTV
               END IF
             END IF
         END DO
      ELSE
         PREVLASTV = 1
         DO I = K, 1, -1
            IF( TAU( I ).EQ.ZERO ) THEN
 *
 *              H(i)  =  I
 *
               DO J = I, K
                  T( J, I ) = ZERO
               END DO
            ELSE
 *
 *              general case
 *
               IF( I.LT.K ) THEN
                  IF( LSAME( STOREV, 'C' ) ) THEN
 *                    Skip any leading zeros.
                     DO LASTV = 1, I-1
                        IF( V( LASTV, I ).NE.ZERO ) EXIT
                     END DO
                     DO J = I+1, K
                        T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) )
                     END DO
                     J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i)
 *
                     CALL ZGEMV( 'Conjugate transpose', N-K+I-J, K-I,
     $                           -TAU( I ), V( J, I+1 ), LDV, V( J, I ),
     $                           1, ONE, T( I+1, I ), 1 )
                  ELSE
 *                    Skip any leading zeros.
                     DO LASTV = 1, I-1
                        IF( V( I, LASTV ).NE.ZERO ) EXIT
                     END DO
                     DO J = I+1, K
                        T( J, I ) = -TAU( I ) * V( J, N-K+I )
                     END DO
                     J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H
 *
                     CALL ZGEMM( 'N', 'C', K-I, 1, N-K+I-J,
     $                           -TAU( I ),
     $                           V( I+1, J ), LDV, V( I, J ), LDV,
     $                           ONE, T( I+1, I ), LDT )
                  END IF
 *
 *                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
 *
                  CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit',
     $                        K-I,
     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
                  IF( I.GT.1 ) THEN
                     PREVLASTV = MIN( PREVLASTV, LASTV )
                  ELSE
                     PREVLASTV = LASTV
                  END IF
               END IF
               T( I, I ) = TAU( I )
            END IF
         END DO
      END IF
      RETURN
 *
 *     End of ZLARFT
 *
      END
--- a/lapack-netlib/SRC/clarft.f
+++ b/lapack-netlib/SRC/clarft.f
@@ -18,7 +18,7 @@
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *       RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *
 *       .. Scalar Arguments ..
 *       CHARACTER          DIRECT, STOREV
@@ -130,7 +130,7 @@
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup complexOTHERauxiliary
 *> \ingroup larft
 *
 *> \par Further Details:
 *  =====================
@@ -159,167 +159,473 @@
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
      RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV,
     $                             TAU, T, LDT )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      CHARACTER          DIRECT, STOREV
      INTEGER            K, LDT, LDV, N
 *        .. Scalar Arguments
 *
      CHARACTER         DIRECT, STOREV
      INTEGER           K, LDT, LDV, N
 *     ..
 *     .. Array Arguments ..
      COMPLEX            T( LDT, * ), TAU( * ), V( LDV, * )
 *     ..
 *
 *  =====================================================================
      COMPLEX           T( LDT, * ), TAU( * ), V( LDV, * )
 *     ..
 *
 *     .. Parameters ..
      COMPLEX            ONE, ZERO
      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
 *     ..
 *
      COMPLEX           ONE, NEG_ONE, ZERO
      PARAMETER(ONE=1.0E+0, ZERO = 0.0E+0, NEG_ONE=-1.0E+0)
 *
 *     .. Local Scalars ..
      INTEGER            I, J, PREVLASTV, LASTV
 *     ..
 *
      INTEGER           I,J,L
      LOGICAL           QR,LQ,QL,DIRF,COLV
 *
 *     .. External Subroutines ..
      EXTERNAL           CGEMM, CGEMV, CTRMV
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
      EXTERNAL           LSAME
 *
      EXTERNAL          CTRMM,CGEMM,CLACPY
 *
 *     .. External Functions..
 *
      LOGICAL           LSAME
      EXTERNAL          LSAME
 *
 *     .. Intrinsic Functions..
 *
      INTRINSIC         CONJG
 *     
 *     The general scheme used is inspired by the approach inside DGEQRT3
 *     which was (at the time of writing this code):
 *     Based on the algorithm of Elmroth and Gustavson,
 *     IBM J. Res. Develop. Vol 44 No. 4 July 2000.
 *     ..
 *     .. Executable Statements ..
 *
 *     Quick return if possible
 *
      IF( N.EQ.0 )
     $   RETURN
 *
      IF( LSAME( DIRECT, 'F' ) ) THEN
         PREVLASTV = N
         DO I = 1, K
            PREVLASTV = MAX( PREVLASTV, I )
            IF( TAU( I ).EQ.ZERO ) THEN
 *
 *              H(i)  =  I
 *
               DO J = 1, I
                  T( J, I ) = ZERO
               END DO
            ELSE
 *
 *              general case
 *
               IF( LSAME( STOREV, 'C' ) ) THEN
 *                 Skip any trailing zeros.
                  DO LASTV = N, I+1, -1
                     IF( V( LASTV, I ).NE.ZERO ) EXIT
                  END DO
                  DO J = 1, I-1
                     T( J, I ) = -TAU( I ) * CONJG( V( I , J ) )
                  END DO
                  J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i)
 *
                  CALL CGEMV( 'Conjugate transpose', J-I, I-1,
     $                        -TAU( I ), V( I+1, 1 ), LDV,
     $                        V( I+1, I ), 1,
     $                        ONE, T( 1, I ), 1 )
               ELSE
 *                 Skip any trailing zeros.
                  DO LASTV = N, I+1, -1
                     IF( V( I, LASTV ).NE.ZERO ) EXIT
                  END DO
                  DO J = 1, I-1
                     T( J, I ) = -TAU( I ) * V( J , I )
                  END DO
                  J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H
 *
                  CALL CGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ),
     $                        V( 1, I+1 ), LDV, V( I, I+1 ), LDV,
     $                        ONE, T( 1, I ), LDT )
               END IF
 *
 *              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
 *
               CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T,
     $                     LDT, T( 1, I ), 1 )
               T( I, I ) = TAU( I )
               IF( I.GT.1 ) THEN
                  PREVLASTV = MAX( PREVLASTV, LASTV )
               ELSE
                  PREVLASTV = LASTV
               END IF
            END IF
      IF(N.EQ.0.OR.K.EQ.0) THEN
         RETURN
      END IF
 *
 *     Base case
 *
      IF(N.EQ.1.OR.K.EQ.1) THEN
         T(1,1) = TAU(1)
         RETURN
      END IF
 *
 *     Beginning of executable statements
 *
      L = K / 2
 *
 *     Determine what kind of Q we need to compute
 *     We assume that if the user doesn't provide 'F' for DIRECT,
 *     then they meant to provide 'B' and if they don't provide
 *     'C' for STOREV, then they meant to provide 'R'
 *
      DIRF = LSAME(DIRECT,'F')
      COLV = LSAME(STOREV,'C')
 *
 *     QR happens when we have forward direction in column storage
 *
      QR = DIRF.AND.COLV
 *
 *     LQ happens when we have forward direction in row storage
 *
      LQ = DIRF.AND.(.NOT.COLV)
 *
 *     QL happens when we have backward direction in column storage
 *
      QL = (.NOT.DIRF).AND.COLV
 *
 *     The last case is RQ. Due to how we structured this, if the
 *     above 3 are false, then RQ must be true, so we never store 
 *     this
 *     RQ happens when we have backward direction in row storage
 *     RQ = (.NOT.DIRF).AND.(.NOT.COLV)
 *
      IF(QR) THEN
 *
 *        Break V apart into 6 components
 *
 *        V = |---------------|
 *            |V_{1,1} 0      |
 *            |V_{2,1} V_{2,2}|
 *            |V_{3,1} V_{3,2}|
 *            |---------------|
 *
 *        V_{1,1}\in\C^{l,l}      unit lower triangular
 *        V_{2,1}\in\C^{k-l,l}    rectangular
 *        V_{3,1}\in\C^{n-k,l}    rectangular
 *        
 *        V_{2,2}\in\C^{k-l,k-l}  unit lower triangular
 *        V_{3,2}\in\C^{n-k,k-l}  rectangular
 *
 *        We will construct the T matrix 
 *        T = |---------------|
 *            |T_{1,1} T_{1,2}|
 *            |0       T_{2,2}|
 *            |---------------|
 *
 *        T is the triangular factor obtained from block reflectors. 
 *        To motivate the structure, assume we have already computed T_{1,1}
 *        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2
 *
 *        T_{1,1}\in\C^{l, l}     upper triangular
 *        T_{2,2}\in\C^{k-l, k-l} upper triangular
 *        T_{1,2}\in\C^{l, k-l}   rectangular
 *
 *        Where l = floor(k/2)
 *
 *        Then, consider the product:
 *        
 *        (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2')
 *        = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2'
 *        
 *        Define T{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2}
 *        
 *        Then, we can define the matrix V as 
 *        V = |-------|
 *            |V_1 V_2|
 *            |-------|
 *        
 *        So, our product is equivalent to the matrix product
 *        I - V*T*V'
 *        This means, we can compute T_{1,1} and T_{2,2}, then use this information
 *        to compute T_{1,2}
 *
 *        Compute T_{1,1} recursively
 *
         CALL CLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT)
 *
 *        Compute T_{2,2} recursively
 *
         CALL CLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, 
     $               TAU(L+1), T(L+1, L+1), LDT)
 *
 *        Compute T_{1,2} 
 *        T_{1,2} = V_{2,1}'
 *
         DO J = 1, L
            DO I = 1, K-L
               T(J, L+I) = CONJG(V(L+I, J))
            END DO
         END DO
      ELSE
         PREVLASTV = 1
         DO I = K, 1, -1
            IF( TAU( I ).EQ.ZERO ) THEN
 *
 *              H(i)  =  I
 *
               DO J = I, K
                  T( J, I ) = ZERO
               END DO
            ELSE
 *
 *              general case
 *
               IF( I.LT.K ) THEN
                  IF( LSAME( STOREV, 'C' ) ) THEN
 *                    Skip any leading zeros.
                     DO LASTV = 1, I-1
                        IF( V( LASTV, I ).NE.ZERO ) EXIT
                     END DO
                     DO J = I+1, K
                        T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) )
                     END DO
                     J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i)
 *
                     CALL CGEMV( 'Conjugate transpose', N-K+I-J, K-I,
     $                           -TAU( I ), V( J, I+1 ), LDV, V( J, I ),
     $                           1, ONE, T( I+1, I ), 1 )
                  ELSE
 *                    Skip any leading zeros.
                     DO LASTV = 1, I-1
                        IF( V( I, LASTV ).NE.ZERO ) EXIT
                     END DO
                     DO J = I+1, K
                        T( J, I ) = -TAU( I ) * V( J, N-K+I )
                     END DO
                     J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H
 *
                     CALL CGEMM( 'N', 'C', K-I, 1, N-K+I-J, -TAU( I ),
     $                           V( I+1, J ), LDV, V( I, J ), LDV,
     $                           ONE, T( I+1, I ), LDT )
                  END IF
 *
 *                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
 *
                  CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', K-I,
     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
                  IF( I.GT.1 ) THEN
                     PREVLASTV = MIN( PREVLASTV, LASTV )
                  ELSE
                     PREVLASTV = LASTV
                  END IF
               END IF
               T( I, I ) = TAU( I )
            END IF
 *
 *        T_{1,2} = T_{1,2}*V_{2,2}
 *
         CALL CTRMM('Right', 'Lower', 'No transpose', 'Unit', L,
     $         K-L, ONE, V(L+1, L+1), LDV, T(1, L+1), LDT)

 *
 *        T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2}
 *        Note: We assume K <= N, and GEMM will do nothing if N=K
 *
         CALL CGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, 
     $         V(K+1, 1), LDV, V(K+1, L+1), LDV, ONE, T(1, L+1),
     $         LDT)
 *
 *        At this point, we have that T_{1,2} = V_1'*V_2
 *        All that is left is to pre and post multiply by -T_{1,1} and T_{2,2}
 *        respectively.
 *
 *        T_{1,2} = -T_{1,1}*T_{1,2}
 *
         CALL CTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L,
     $         K-L, NEG_ONE, T, LDT, T(1, L+1), LDT)
 *
 *        T_{1,2} = T_{1,2}*T_{2,2}
 *
         CALL CTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, 
     $         K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT)

      ELSE IF(LQ) THEN
 *
 *        Break V apart into 6 components
 *
 *        V = |----------------------|
 *            |V_{1,1} V_{1,2} V{1,3}|
 *            |0       V_{2,2} V{2,3}|
 *            |----------------------|
 *
 *        V_{1,1}\in\C^{l,l}      unit upper triangular
 *        V_{1,2}\in\C^{l,k-l}    rectangular
 *        V_{1,3}\in\C^{l,n-k}    rectangular
 *        
 *        V_{2,2}\in\C^{k-l,k-l}  unit upper triangular
 *        V_{2,3}\in\C^{k-l,n-k}  rectangular
 *
 *        Where l = floor(k/2)
 *
 *        We will construct the T matrix 
 *        T = |---------------|
 *            |T_{1,1} T_{1,2}|
 *            |0       T_{2,2}|
 *            |---------------|
 *
 *        T is the triangular factor obtained from block reflectors. 
 *        To motivate the structure, assume we have already computed T_{1,1}
 *        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2
 *
 *        T_{1,1}\in\C^{l, l}     upper triangular
 *        T_{2,2}\in\C^{k-l, k-l} upper triangular
 *        T_{1,2}\in\C^{l, k-l}   rectangular
 *
 *        Then, consider the product:
 *        
 *        (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2)
 *        = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2
 *        
 *        Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2}
 *        
 *        Then, we can define the matrix V as 
 *        V = |---|
 *            |V_1|
 *            |V_2|
 *            |---|
 *        
 *        So, our product is equivalent to the matrix product
 *        I - V'*T*V
 *        This means, we can compute T_{1,1} and T_{2,2}, then use this information
 *        to compute T_{1,2}
 *
 *        Compute T_{1,1} recursively
 *
         CALL CLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT)
 *
 *        Compute T_{2,2} recursively
 *
         CALL CLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, 
     $               TAU(L+1), T(L+1, L+1), LDT)

 *
 *        Compute T_{1,2}
 *        T_{1,2} = V_{1,2}
 *
         CALL CLACPY('All', L, K-L, V(1, L+1), LDV, T(1, L+1), LDT)
 *
 *        T_{1,2} = T_{1,2}*V_{2,2}'
 *
         CALL CTRMM('Right', 'Upper', 'Conjugate', 'Unit', L, K-L,
     $      ONE, V(L+1, L+1), LDV, T(1, L+1), LDT)

 *
 *        T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2}
 *        Note: We assume K <= N, and GEMM will do nothing if N=K
 *
         CALL CGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE,
     $      V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, T(1, L+1), LDT)
 *
 *        At this point, we have that T_{1,2} = V_1*V_2'
 *        All that is left is to pre and post multiply by -T_{1,1} and T_{2,2}
 *        respectively.
 *
 *        T_{1,2} = -T_{1,1}*T_{1,2}
 *
         CALL CTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L,
     $      K-L, NEG_ONE, T, LDT, T(1, L+1), LDT)

 *
 *        T_{1,2} = T_{1,2}*T_{2,2}
 *
         CALL CTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L,
     $      K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT)
      ELSE IF(QL) THEN
 *
 *        Break V apart into 6 components
 *
 *        V = |---------------|
 *            |V_{1,1} V_{1,2}|
 *            |V_{2,1} V_{2,2}|
 *            |0       V_{3,2}|
 *            |---------------|
 *
 *        V_{1,1}\in\C^{n-k,k-l}  rectangular
 *        V_{2,1}\in\C^{k-l,k-l}  unit upper triangular
 *        
 *        V_{1,2}\in\C^{n-k,l}    rectangular
 *        V_{2,2}\in\C^{k-l,l}    rectangular
 *        V_{3,2}\in\C^{l,l}      unit upper triangular
 *
 *        We will construct the T matrix 
 *        T = |---------------|
 *            |T_{1,1} 0      |
 *            |T_{2,1} T_{2,2}|
 *            |---------------|
 *
 *        T is the triangular factor obtained from block reflectors. 
 *        To motivate the structure, assume we have already computed T_{1,1}
 *        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2
 *
 *        T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular
 *        T_{2,2}\in\C^{l, l}     non-unit lower triangular
 *        T_{2,1}\in\C^{k-l, l}   rectangular
 *
 *        Where l = floor(k/2)
 *
 *        Then, consider the product:
 *        
 *        (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1')
 *        = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1'
 *        
 *        Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1}
 *        
 *        Then, we can define the matrix V as 
 *        V = |-------|
 *            |V_1 V_2|
 *            |-------|
 *        
 *        So, our product is equivalent to the matrix product
 *        I - V*T*V'
 *        This means, we can compute T_{1,1} and T_{2,2}, then use this information
 *        to compute T_{2,1}
 *
 *        Compute T_{1,1} recursively
 *
         CALL CLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT)
 *
 *        Compute T_{2,2} recursively
 *
         CALL CLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV,
     $               TAU(K-L+1), T(K-L+1, K-L+1), LDT)
 *
 *        Compute T_{2,1}
 *        T_{2,1} = V_{2,2}'
 *
         DO J = 1, K-L
            DO I = 1, L
               T(K-L+I, J) = CONJG(V(N-K+J, K-L+I))
            END DO
         END DO
      END IF
      RETURN
 *
 *     End of CLARFT
 *        T_{2,1} = T_{2,1}*V_{2,1}
 *
         CALL CTRMM('Right', 'Upper', 'No transpose', 'Unit', L,
     $               K-L, ONE, V(N-K+1, 1), LDV, T(K-L+1, 1), LDT)

 *
 *        T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1}
 *        Note: We assume K <= N, and GEMM will do nothing if N=K
 *
         CALL CGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE,
     $               V(1, K-L+1), LDV, V, LDV, ONE, T(K-L+1, 1),
     $               LDT)
 *
 *        At this point, we have that T_{2,1} = V_2'*V_1
 *        All that is left is to pre and post multiply by -T_{2,2} and T_{1,1}
 *        respectively.
 *
 *        T_{2,1} = -T_{2,2}*T_{2,1}
 *
         CALL CTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L,
     $               K-L, NEG_ONE, T(K-L+1, K-L+1), LDT,
     $               T(K-L+1, 1), LDT)
 *
      END
 *        T_{2,1} = T_{2,1}*T_{1,1}
 *
         CALL CTRMM('Right', 'Lower', 'No transpose', 'Non-unit', L,
     $               K-L, ONE, T, LDT, T(K-L+1, 1), LDT)
      ELSE
 *
 *        Else means RQ case
 *
 *        Break V apart into 6 components
 *
 *        V = |-----------------------|
 *            |V_{1,1} V_{1,2} 0      |
 *            |V_{2,1} V_{2,2} V_{2,3}|
 *            |-----------------------|
 *
 *        V_{1,1}\in\C^{k-l,n-k}  rectangular
 *        V_{1,2}\in\C^{k-l,k-l}  unit lower triangular
 *
 *        V_{2,1}\in\C^{l,n-k}    rectangular
 *        V_{2,2}\in\C^{l,k-l}    rectangular
 *        V_{2,3}\in\C^{l,l}      unit lower triangular
 *
 *        We will construct the T matrix 
 *        T = |---------------|
 *            |T_{1,1} 0      |
 *            |T_{2,1} T_{2,2}|
 *            |---------------|
 *
 *        T is the triangular factor obtained from block reflectors. 
 *        To motivate the structure, assume we have already computed T_{1,1}
 *        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2
 *
 *        T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular
 *        T_{2,2}\in\C^{l, l}     non-unit lower triangular
 *        T_{2,1}\in\C^{k-l, l}   rectangular
 *
 *        Where l = floor(k/2)
 *
 *        Then, consider the product:
 *        
 *        (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1)
 *        = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1
 *        
 *        Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1}
 *        
 *        Then, we can define the matrix V as 
 *        V = |---|
 *            |V_1|
 *            |V_2|
 *            |---|
 *        
 *        So, our product is equivalent to the matrix product
 *        I - V'*T*V
 *        This means, we can compute T_{1,1} and T_{2,2}, then use this information
 *        to compute T_{2,1}
 *
 *        Compute T_{1,1} recursively
 *
         CALL CLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT)
 *
 *        Compute T_{2,2} recursively
 *
         CALL CLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV,
     $               TAU(K-L+1), T(K-L+1, K-L+1), LDT)
 *
 *        Compute T_{2,1}
 *        T_{2,1} = V_{2,2}
 *
         CALL CLACPY('All', L, K-L, V(K-L+1, N-K+1), LDV, 
     $               T(K-L+1, 1), LDT)

 *
 *        T_{2,1} = T_{2,1}*V_{1,2}'
 *
         CALL CTRMM('Right', 'Lower', 'Conjugate', 'Unit', L, K-L,
     $               ONE, V(1, N-K+1), LDV, T(K-L+1,1), LDT)

 *
 *        T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1}
 *        Note: We assume K <= N, and GEMM will do nothing if N=K
 *
         CALL CGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, 
     $               V(K-L+1, 1), LDV, V, LDV, ONE, T(K-L+1, 1),
     $               LDT)

 *
 *        At this point, we have that T_{2,1} = V_2*V_1'
 *        All that is left is to pre and post multiply by -T_{2,2} and T_{1,1}
 *        respectively.
 *
 *        T_{2,1} = -T_{2,2}*T_{2,1}
 *
         CALL CTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L,
     $               K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, 
     $               T(K-L+1, 1), LDT)

 *
 *        T_{2,1} = T_{2,1}*T_{1,1}
 *
         CALL CTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L,
     $               K-L, ONE, T, LDT, T(K-L+1, 1), LDT)
      END IF
      END SUBROUTINE
--- a/lapack-netlib/SRC/dlarft.f
+++ b/lapack-netlib/SRC/dlarft.f
@@ -18,7 +18,7 @@
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *       RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *
 *       .. Scalar Arguments ..
 *       CHARACTER          DIRECT, STOREV
@@ -130,7 +130,7 @@
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup doubleOTHERauxiliary
 *> \ingroup larft
 *
 *> \par Further Details:
 *  =====================
@@ -159,165 +159,470 @@
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
      RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV,
     $                             TAU, T, LDT )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
 *        .. Scalar Arguments
 *
      CHARACTER          DIRECT, STOREV
      INTEGER            K, LDT, LDV, N
 *     ..
 *     .. Array Arguments ..
 *
      DOUBLE PRECISION   T( LDT, * ), TAU( * ), V( LDV, * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      DOUBLE PRECISION   ONE, ZERO
      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
 *     ..
 *
      DOUBLE PRECISION ONE, NEG_ONE, ZERO
      PARAMETER(ONE=1.0D+0, ZERO = 0.0D+0, NEG_ONE=-1.0D+0)
 *
 *     .. Local Scalars ..
      INTEGER            I, J, PREVLASTV, LASTV
 *     ..
 *
      INTEGER           I,J,L
      LOGICAL           QR,LQ,QL,DIRF,COLV
 *
 *     .. External Subroutines ..
      EXTERNAL           DGEMV, DTRMV
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
      EXTERNAL           LSAME
 *
      EXTERNAL          DTRMM,DGEMM,DLACPY
 *
 *     .. External Functions..
 *
      LOGICAL           LSAME
      EXTERNAL          LSAME
 *     
 *     The general scheme used is inspired by the approach inside DGEQRT3
 *     which was (at the time of writing this code):
 *     Based on the algorithm of Elmroth and Gustavson,
 *     IBM J. Res. Develop. Vol 44 No. 4 July 2000.
 *     ..
 *     .. Executable Statements ..
 *
 *     Quick return if possible
 *
      IF( N.EQ.0 )
     $   RETURN
 *
      IF( LSAME( DIRECT, 'F' ) ) THEN
         PREVLASTV = N
         DO I = 1, K
            PREVLASTV = MAX( I, PREVLASTV )
            IF( TAU( I ).EQ.ZERO ) THEN
 *
 *              H(i)  =  I
 *
               DO J = 1, I
                  T( J, I ) = ZERO
               END DO
            ELSE
 *
 *              general case
 *
               IF( LSAME( STOREV, 'C' ) ) THEN
 *                 Skip any trailing zeros.
                  DO LASTV = N, I+1, -1
                     IF( V( LASTV, I ).NE.ZERO ) EXIT
                  END DO
                  DO J = 1, I-1
                     T( J, I ) = -TAU( I ) * V( I , J )
                  END DO
                  J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i)
 *
                  CALL DGEMV( 'Transpose', J-I, I-1, -TAU( I ),
     $                        V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE,
     $                        T( 1, I ), 1 )
               ELSE
 *                 Skip any trailing zeros.
                  DO LASTV = N, I+1, -1
                     IF( V( I, LASTV ).NE.ZERO ) EXIT
                  END DO
                  DO J = 1, I-1
                     T( J, I ) = -TAU( I ) * V( J , I )
                  END DO
                  J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T
 *
                  CALL DGEMV( 'No transpose', I-1, J-I, -TAU( I ),
     $                        V( 1, I+1 ), LDV, V( I, I+1 ), LDV, ONE,
     $                        T( 1, I ), 1 )
               END IF
 *
 *              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
 *
               CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T,
     $                     LDT, T( 1, I ), 1 )
               T( I, I ) = TAU( I )
               IF( I.GT.1 ) THEN
                  PREVLASTV = MAX( PREVLASTV, LASTV )
               ELSE
                  PREVLASTV = LASTV
               END IF
            END IF
      IF(N.EQ.0.OR.K.EQ.0) THEN
         RETURN
      END IF
 *
 *     Base case
 *
      IF(N.EQ.1.OR.K.EQ.1) THEN
         T(1,1) = TAU(1)
         RETURN
      END IF
 *
 *     Beginning of executable statements
 *
      L = K / 2
 *
 *     Determine what kind of Q we need to compute
 *     We assume that if the user doesn't provide 'F' for DIRECT,
 *     then they meant to provide 'B' and if they don't provide
 *     'C' for STOREV, then they meant to provide 'R'
 *
      DIRF = LSAME(DIRECT,'F')
      COLV = LSAME(STOREV,'C')
 *
 *     QR happens when we have forward direction in column storage
 *
      QR = DIRF.AND.COLV
 *
 *     LQ happens when we have forward direction in row storage
 *
      LQ = DIRF.AND.(.NOT.COLV)
 *
 *     QL happens when we have backward direction in column storage
 *
      QL = (.NOT.DIRF).AND.COLV
 *
 *     The last case is RQ. Due to how we structured this, if the
 *     above 3 are false, then RQ must be true, so we never store 
 *     this
 *     RQ happens when we have backward direction in row storage
 *     RQ = (.NOT.DIRF).AND.(.NOT.COLV)
 *
      IF(QR) THEN
 *
 *        Break V apart into 6 components
 *
 *        V = |---------------|
 *            |V_{1,1} 0      |
 *            |V_{2,1} V_{2,2}|
 *            |V_{3,1} V_{3,2}|
 *            |---------------|
 *
 *        V_{1,1}\in\R^{l,l}      unit lower triangular
 *        V_{2,1}\in\R^{k-l,l}    rectangular
 *        V_{3,1}\in\R^{n-k,l}    rectangular
 *        
 *        V_{2,2}\in\R^{k-l,k-l}  unit lower triangular
 *        V_{3,2}\in\R^{n-k,k-l}  rectangular
 *
 *        We will construct the T matrix 
 *        T = |---------------|
 *            |T_{1,1} T_{1,2}|
 *            |0       T_{2,2}|
 *            |---------------|
 *
 *        T is the triangular factor obtained from block reflectors. 
 *        To motivate the structure, assume we have already computed T_{1,1}
 *        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2
 *
 *        T_{1,1}\in\R^{l, l}     upper triangular
 *        T_{2,2}\in\R^{k-l, k-l} upper triangular
 *        T_{1,2}\in\R^{l, k-l}   rectangular
 *
 *        Where l = floor(k/2)
 *
 *        Then, consider the product:
 *        
 *        (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2')
 *        = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2'
 *        
 *        Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2}
 *        
 *        Then, we can define the matrix V as 
 *        V = |-------|
 *            |V_1 V_2|
 *            |-------|
 *        
 *        So, our product is equivalent to the matrix product
 *        I - V*T*V'
 *        This means, we can compute T_{1,1} and T_{2,2}, then use this information
 *        to compute T_{1,2}
 *
 *        Compute T_{1,1} recursively
 *
         CALL DLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT)
 *
 *        Compute T_{2,2} recursively
 *
         CALL DLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, 
     $               TAU(L+1), T(L+1, L+1), LDT)
 *
 *        Compute T_{1,2} 
 *        T_{1,2} = V_{2,1}'
 *
         DO J = 1, L
            DO I = 1, K-L
               T(J, L+I) = V(L+I, J)
            END DO
         END DO
      ELSE
         PREVLASTV = 1
         DO I = K, 1, -1
            IF( TAU( I ).EQ.ZERO ) THEN
 *
 *              H(i)  =  I
 *
               DO J = I, K
                  T( J, I ) = ZERO
               END DO
            ELSE
 *
 *              general case
 *
               IF( I.LT.K ) THEN
                  IF( LSAME( STOREV, 'C' ) ) THEN
 *                    Skip any leading zeros.
                     DO LASTV = 1, I-1
                        IF( V( LASTV, I ).NE.ZERO ) EXIT
                     END DO
                     DO J = I+1, K
                        T( J, I ) = -TAU( I ) * V( N-K+I , J )
                     END DO
                     J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i)
 *
                     CALL DGEMV( 'Transpose', N-K+I-J, K-I, -TAU( I ),
     $                           V( J, I+1 ), LDV, V( J, I ), 1, ONE,
     $                           T( I+1, I ), 1 )
                  ELSE
 *                    Skip any leading zeros.
                     DO LASTV = 1, I-1
                        IF( V( I, LASTV ).NE.ZERO ) EXIT
                     END DO
                     DO J = I+1, K
                        T( J, I ) = -TAU( I ) * V( J, N-K+I )
                     END DO
                     J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T
 *
                     CALL DGEMV( 'No transpose', K-I, N-K+I-J,
     $                    -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV,
     $                    ONE, T( I+1, I ), 1 )
                  END IF
 *
 *                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
 *
                  CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', K-I,
     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
                  IF( I.GT.1 ) THEN
                     PREVLASTV = MIN( PREVLASTV, LASTV )
                  ELSE
                     PREVLASTV = LASTV
                  END IF
               END IF
               T( I, I ) = TAU( I )
            END IF
 *
 *        T_{1,2} = T_{1,2}*V_{2,2}
 *
         CALL DTRMM('Right', 'Lower', 'No transpose', 'Unit', L,
     $               K-L, ONE, V(L+1, L+1), LDV, T(1, L+1), LDT)

 *
 *        T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2}
 *        Note: We assume K <= N, and GEMM will do nothing if N=K
 *
         CALL DGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, 
     $               V(K+1, 1), LDV, V(K+1, L+1), LDV, ONE, 
     $               T(1, L+1), LDT)
 *
 *        At this point, we have that T_{1,2} = V_1'*V_2
 *        All that is left is to pre and post multiply by -T_{1,1} and T_{2,2}
 *        respectively.
 *
 *        T_{1,2} = -T_{1,1}*T_{1,2}
 *
         CALL DTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L,
     $               K-L, NEG_ONE, T, LDT, T(1, L+1), LDT)
 *
 *        T_{1,2} = T_{1,2}*T_{2,2}
 *
         CALL DTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, 
     $               K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT)

      ELSE IF(LQ) THEN
 *
 *        Break V apart into 6 components
 *
 *        V = |----------------------|
 *            |V_{1,1} V_{1,2} V{1,3}|
 *            |0       V_{2,2} V{2,3}|
 *            |----------------------|
 *
 *        V_{1,1}\in\R^{l,l}      unit upper triangular
 *        V_{1,2}\in\R^{l,k-l}    rectangular
 *        V_{1,3}\in\R^{l,n-k}    rectangular
 *        
 *        V_{2,2}\in\R^{k-l,k-l}  unit upper triangular
 *        V_{2,3}\in\R^{k-l,n-k}  rectangular
 *
 *        Where l = floor(k/2)
 *
 *        We will construct the T matrix 
 *        T = |---------------|
 *            |T_{1,1} T_{1,2}|
 *            |0       T_{2,2}|
 *            |---------------|
 *
 *        T is the triangular factor obtained from block reflectors. 
 *        To motivate the structure, assume we have already computed T_{1,1}
 *        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2
 *
 *        T_{1,1}\in\R^{l, l}     upper triangular
 *        T_{2,2}\in\R^{k-l, k-l} upper triangular
 *        T_{1,2}\in\R^{l, k-l}   rectangular
 *
 *        Then, consider the product:
 *        
 *        (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2)
 *        = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2
 *        
 *        Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2}
 *        
 *        Then, we can define the matrix V as 
 *        V = |---|
 *            |V_1|
 *            |V_2|
 *            |---|
 *        
 *        So, our product is equivalent to the matrix product
 *        I - V'*T*V
 *        This means, we can compute T_{1,1} and T_{2,2}, then use this information
 *        to compute T_{1,2}
 *
 *        Compute T_{1,1} recursively
 *
         CALL DLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT)
 *
 *        Compute T_{2,2} recursively
 *
         CALL DLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, 
     $      TAU(L+1), T(L+1, L+1), LDT)

 *
 *        Compute T_{1,2}
 *        T_{1,2} = V_{1,2}
 *
         CALL DLACPY('All', L, K-L, V(1, L+1), LDV, T(1, L+1), LDT)
 *
 *        T_{1,2} = T_{1,2}*V_{2,2}'
 *
         CALL DTRMM('Right', 'Upper', 'Transpose', 'Unit', L, K-L,
     $               ONE, V(L+1, L+1), LDV, T(1, L+1), LDT)

 *
 *        T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2}
 *        Note: We assume K <= N, and GEMM will do nothing if N=K
 *
         CALL DGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE,
     $               V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, 
     $               T(1, L+1), LDT)
 *
 *        At this point, we have that T_{1,2} = V_1*V_2'
 *        All that is left is to pre and post multiply by -T_{1,1} and T_{2,2}
 *        respectively.
 *
 *        T_{1,2} = -T_{1,1}*T_{1,2}
 *
         CALL DTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L,
     $               K-L, NEG_ONE, T, LDT, T(1, L+1), LDT)

 *
 *        T_{1,2} = T_{1,2}*T_{2,2}
 *
         CALL DTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L,
     $               K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT)
      ELSE IF(QL) THEN
 *
 *        Break V apart into 6 components
 *
 *        V = |---------------|
 *            |V_{1,1} V_{1,2}|
 *            |V_{2,1} V_{2,2}|
 *            |0       V_{3,2}|
 *            |---------------|
 *
 *        V_{1,1}\in\R^{n-k,k-l}  rectangular
 *        V_{2,1}\in\R^{k-l,k-l}  unit upper triangular
 *        
 *        V_{1,2}\in\R^{n-k,l}    rectangular
 *        V_{2,2}\in\R^{k-l,l}    rectangular
 *        V_{3,2}\in\R^{l,l}      unit upper triangular
 *
 *        We will construct the T matrix 
 *        T = |---------------|
 *            |T_{1,1} 0      |
 *            |T_{2,1} T_{2,2}|
 *            |---------------|
 *
 *        T is the triangular factor obtained from block reflectors. 
 *        To motivate the structure, assume we have already computed T_{1,1}
 *        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2
 *
 *        T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular
 *        T_{2,2}\in\R^{l, l}     non-unit lower triangular
 *        T_{2,1}\in\R^{k-l, l}   rectangular
 *
 *        Where l = floor(k/2)
 *
 *        Then, consider the product:
 *        
 *        (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1')
 *        = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1'
 *        
 *        Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1}
 *        
 *        Then, we can define the matrix V as 
 *        V = |-------|
 *            |V_1 V_2|
 *            |-------|
 *        
 *        So, our product is equivalent to the matrix product
 *        I - V*T*V'
 *        This means, we can compute T_{1,1} and T_{2,2}, then use this information
 *        to compute T_{2,1}
 *
 *        Compute T_{1,1} recursively
 *
         CALL DLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT)
 *
 *        Compute T_{2,2} recursively
 *
         CALL DLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV,
     $               TAU(K-L+1), T(K-L+1, K-L+1), LDT)
 *
 *        Compute T_{2,1}
 *        T_{2,1} = V_{2,2}'
 *
         DO J = 1, K-L
            DO I = 1, L
               T(K-L+I, J) = V(N-K+J, K-L+I)
            END DO
         END DO
      END IF
      RETURN
 *
 *     End of DLARFT
 *        T_{2,1} = T_{2,1}*V_{2,1}
 *
         CALL DTRMM('Right', 'Upper', 'No transpose', 'Unit', L,
     $               K-L, ONE, V(N-K+1, 1), LDV, T(K-L+1, 1), LDT)

 *
 *        T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1}
 *        Note: We assume K <= N, and GEMM will do nothing if N=K
 *
         CALL DGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE,
     $               V(1, K-L+1), LDV, V, LDV, ONE, T(K-L+1, 1),
     $               LDT)
 *
 *        At this point, we have that T_{2,1} = V_2'*V_1
 *        All that is left is to pre and post multiply by -T_{2,2} and T_{1,1}
 *        respectively.
 *
 *        T_{2,1} = -T_{2,2}*T_{2,1}
 *
         CALL DTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L,
     $               K-L, NEG_ONE, T(K-L+1, K-L+1), LDT,
     $               T(K-L+1, 1), LDT)
 *
      END
 *        T_{2,1} = T_{2,1}*T_{1,1}
 *
         CALL DTRMM('Right', 'Lower', 'No transpose', 'Non-unit', L,
     $               K-L, ONE, T, LDT, T(K-L+1, 1), LDT)
      ELSE
 *
 *        Else means RQ case
 *
 *        Break V apart into 6 components
 *
 *        V = |-----------------------|
 *            |V_{1,1} V_{1,2} 0      |
 *            |V_{2,1} V_{2,2} V_{2,3}|
 *            |-----------------------|
 *
 *        V_{1,1}\in\R^{k-l,n-k}  rectangular
 *        V_{1,2}\in\R^{k-l,k-l}  unit lower triangular
 *
 *        V_{2,1}\in\R^{l,n-k}    rectangular
 *        V_{2,2}\in\R^{l,k-l}    rectangular
 *        V_{2,3}\in\R^{l,l}      unit lower triangular
 *
 *        We will construct the T matrix 
 *        T = |---------------|
 *            |T_{1,1} 0      |
 *            |T_{2,1} T_{2,2}|
 *            |---------------|
 *
 *        T is the triangular factor obtained from block reflectors. 
 *        To motivate the structure, assume we have already computed T_{1,1}
 *        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2
 *
 *        T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular
 *        T_{2,2}\in\R^{l, l}     non-unit lower triangular
 *        T_{2,1}\in\R^{k-l, l}   rectangular
 *
 *        Where l = floor(k/2)
 *
 *        Then, consider the product:
 *        
 *        (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1)
 *        = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1
 *        
 *        Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1}
 *        
 *        Then, we can define the matrix V as 
 *        V = |---|
 *            |V_1|
 *            |V_2|
 *            |---|
 *        
 *        So, our product is equivalent to the matrix product
 *        I - V'*T*V
 *        This means, we can compute T_{1,1} and T_{2,2}, then use this information
 *        to compute T_{2,1}
 *
 *        Compute T_{1,1} recursively
 *
         CALL DLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT)
 *
 *        Compute T_{2,2} recursively
 *
         CALL DLARFT(DIRECT, STOREV, N, L, V(K-L+1, 1), LDV,
     $               TAU(K-L+1), T(K-L+1, K-L+1), LDT)
 *
 *        Compute T_{2,1}
 *        T_{2,1} = V_{2,2}
 *
         CALL DLACPY('All', L, K-L, V(K-L+1, N-K+1), LDV,
     $               T(K-L+1, 1), LDT)

 *
 *        T_{2,1} = T_{2,1}*V_{1,2}'
 *
         CALL DTRMM('Right', 'Lower', 'Transpose', 'Unit', L, K-L,
     $               ONE, V(1, N-K+1), LDV, T(K-L+1, 1), LDT)

 *
 *        T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} 
 *        Note: We assume K <= N, and GEMM will do nothing if N=K
 *
         CALL DGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, 
     $               V(K-L+1, 1), LDV, V, LDV, ONE, T(K-L+1, 1),
     $               LDT)

 *
 *        At this point, we have that T_{2,1} = V_2*V_1'
 *        All that is left is to pre and post multiply by -T_{2,2} and T_{1,1}
 *        respectively.
 *
 *        T_{2,1} = -T_{2,2}*T_{2,1}
 *
         CALL DTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L,
     $               K-L, NEG_ONE, T(K-L+1, K-L+1), LDT,
     $               T(K-L+1, 1), LDT)

 *
 *        T_{2,1} = T_{2,1}*T_{1,1}
 *
         CALL DTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L,
     $               K-L, ONE, T, LDT, T(K-L+1, 1), LDT)
      END IF
      END SUBROUTINE
--- a/lapack-netlib/SRC/slarft.f
+++ b/lapack-netlib/SRC/slarft.f
@@ -18,7 +18,7 @@
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *       RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *
 *       .. Scalar Arguments ..
 *       CHARACTER          DIRECT, STOREV
@@ -127,10 +127,10 @@
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author Johnathan Rhyne, Univ. of Colorado Denver (original author, 2024)
 *> \author NAG Ltd.
 *
 *> \ingroup realOTHERauxiliary
 *> \ingroup larft
 *
 *> \par Further Details:
 *  =====================
@@ -159,165 +159,470 @@
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
      RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV,
     $                             TAU, T, LDT )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
 *        .. Scalar Arguments
 *
      CHARACTER          DIRECT, STOREV
      INTEGER            K, LDT, LDV, N
 *     ..
 *     .. Array Arguments ..
 *
      REAL               T( LDT, * ), TAU( * ), V( LDV, * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
      REAL               ONE, ZERO
      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
 *     ..
 *
      REAL               ONE, NEG_ONE, ZERO
      PARAMETER(ONE=1.0E+0, ZERO = 0.0E+0, NEG_ONE=-1.0E+0)
 *
 *     .. Local Scalars ..
      INTEGER            I, J, PREVLASTV, LASTV
 *     ..
 *
      INTEGER            I,J,L
      LOGICAL            QR,LQ,QL,DIRF,COLV
 *
 *     .. External Subroutines ..
      EXTERNAL           SGEMV, STRMV
 *     ..
 *     .. External Functions ..
 *
      EXTERNAL           STRMM,SGEMM,SLACPY
 *
 *     .. External Functions..
 *
      LOGICAL            LSAME
      EXTERNAL           LSAME
 *     
 *     The general scheme used is inspired by the approach inside DGEQRT3
 *     which was (at the time of writing this code):
 *     Based on the algorithm of Elmroth and Gustavson,
 *     IBM J. Res. Develop. Vol 44 No. 4 July 2000.
 *     ..
 *     .. Executable Statements ..
 *
 *     Quick return if possible
 *
      IF( N.EQ.0 )
     $   RETURN
 *
      IF( LSAME( DIRECT, 'F' ) ) THEN
         PREVLASTV = N
         DO I = 1, K
            PREVLASTV = MAX( I, PREVLASTV )
            IF( TAU( I ).EQ.ZERO ) THEN
 *
 *              H(i)  =  I
 *
               DO J = 1, I
                  T( J, I ) = ZERO
               END DO
            ELSE
 *
 *              general case
 *
               IF( LSAME( STOREV, 'C' ) ) THEN
 *                 Skip any trailing zeros.
                  DO LASTV = N, I+1, -1
                     IF( V( LASTV, I ).NE.ZERO ) EXIT
                  END DO
                  DO J = 1, I-1
                     T( J, I ) = -TAU( I ) * V( I , J )
                  END DO
                  J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i)
 *
                  CALL SGEMV( 'Transpose', J-I, I-1, -TAU( I ),
     $                        V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE,
     $                        T( 1, I ), 1 )
               ELSE
 *                 Skip any trailing zeros.
                  DO LASTV = N, I+1, -1
                     IF( V( I, LASTV ).NE.ZERO ) EXIT
                  END DO
                  DO J = 1, I-1
                     T( J, I ) = -TAU( I ) * V( J , I )
                  END DO
                  J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T
 *
                  CALL SGEMV( 'No transpose', I-1, J-I, -TAU( I ),
     $                        V( 1, I+1 ), LDV, V( I, I+1 ), LDV,
     $                        ONE, T( 1, I ), 1 )
               END IF
 *
 *              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
 *
               CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T,
     $                     LDT, T( 1, I ), 1 )
               T( I, I ) = TAU( I )
               IF( I.GT.1 ) THEN
                  PREVLASTV = MAX( PREVLASTV, LASTV )
               ELSE
                  PREVLASTV = LASTV
               END IF
            END IF
      IF(N.EQ.0.OR.K.EQ.0) THEN
         RETURN
      END IF
 *
 *     Base case
 *
      IF(N.EQ.1.OR.K.EQ.1) THEN
         T(1,1) = TAU(1)
         RETURN
      END IF
 *
 *     Beginning of executable statements
 *
      L = K / 2
 *
 *     Determine what kind of Q we need to compute
 *     We assume that if the user doesn't provide 'F' for DIRECT,
 *     then they meant to provide 'B' and if they don't provide
 *     'C' for STOREV, then they meant to provide 'R'
 *
      DIRF = LSAME(DIRECT,'F')
      COLV = LSAME(STOREV,'C')
 *
 *     QR happens when we have forward direction in column storage
 *
      QR = DIRF.AND.COLV
 *
 *     LQ happens when we have forward direction in row storage
 *
      LQ = DIRF.AND.(.NOT.COLV)
 *
 *     QL happens when we have backward direction in column storage
 *
      QL = (.NOT.DIRF).AND.COLV
 *
 *     The last case is RQ. Due to how we structured this, if the
 *     above 3 are false, then RQ must be true, so we never store 
 *     this
 *     RQ happens when we have backward direction in row storage
 *     RQ = (.NOT.DIRF).AND.(.NOT.COLV)
 *
      IF(QR) THEN
 *
 *        Break V apart into 6 components
 *
 *        V = |---------------|
 *            |V_{1,1} 0      |
 *            |V_{2,1} V_{2,2}|
 *            |V_{3,1} V_{3,2}|
 *            |---------------|
 *
 *        V_{1,1}\in\R^{l,l}      unit lower triangular
 *        V_{2,1}\in\R^{k-l,l}    rectangular
 *        V_{3,1}\in\R^{n-k,l}    rectangular
 *        
 *        V_{2,2}\in\R^{k-l,k-l}  unit lower triangular
 *        V_{3,2}\in\R^{n-k,k-l}  rectangular
 *
 *        We will construct the T matrix 
 *        T = |---------------|
 *            |T_{1,1} T_{1,2}|
 *            |0       T_{2,2}|
 *            |---------------|
 *
 *        T is the triangular factor obtained from block reflectors. 
 *        To motivate the structure, assume we have already computed T_{1,1}
 *        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2
 *
 *        T_{1,1}\in\R^{l, l}     upper triangular
 *        T_{2,2}\in\R^{k-l, k-l} upper triangular
 *        T_{1,2}\in\R^{l, k-l}   rectangular
 *
 *        Where l = floor(k/2)
 *
 *        Then, consider the product:
 *        
 *        (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2')
 *        = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2'
 *        
 *        Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2}
 *        
 *        Then, we can define the matrix V as 
 *        V = |-------|
 *            |V_1 V_2|
 *            |-------|
 *        
 *        So, our product is equivalent to the matrix product
 *        I - V*T*V'
 *        This means, we can compute T_{1,1} and T_{2,2}, then use this information
 *        to compute T_{1,2}
 *
 *        Compute T_{1,1} recursively
 *
         CALL SLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT)
 *
 *        Compute T_{2,2} recursively
 *
         CALL SLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, 
     $               TAU(L+1), T(L+1, L+1), LDT)
 *
 *        Compute T_{1,2} 
 *        T_{1,2} = V_{2,1}'
 *
         DO J = 1, L
            DO I = 1, K-L
               T(J, L+I) = V(L+I, J)
            END DO
         END DO
      ELSE
         PREVLASTV = 1
         DO I = K, 1, -1
            IF( TAU( I ).EQ.ZERO ) THEN
 *
 *              H(i)  =  I
 *
               DO J = I, K
                  T( J, I ) = ZERO
               END DO
            ELSE
 *
 *              general case
 *
               IF( I.LT.K ) THEN
                  IF( LSAME( STOREV, 'C' ) ) THEN
 *                    Skip any leading zeros.
                     DO LASTV = 1, I-1
                        IF( V( LASTV, I ).NE.ZERO ) EXIT
                     END DO
                     DO J = I+1, K
                        T( J, I ) = -TAU( I ) * V( N-K+I , J )
                     END DO
                     J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i)
 *
                     CALL SGEMV( 'Transpose', N-K+I-J, K-I, -TAU( I ),
     $                           V( J, I+1 ), LDV, V( J, I ), 1, ONE,
     $                           T( I+1, I ), 1 )
                  ELSE
 *                    Skip any leading zeros.
                     DO LASTV = 1, I-1
                        IF( V( I, LASTV ).NE.ZERO ) EXIT
                     END DO
                     DO J = I+1, K
                        T( J, I ) = -TAU( I ) * V( J, N-K+I )
                     END DO
                     J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T
 *
                     CALL SGEMV( 'No transpose', K-I, N-K+I-J,
     $                    -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV,
     $                    ONE, T( I+1, I ), 1 )
                  END IF
 *
 *                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
 *
                  CALL STRMV( 'Lower', 'No transpose', 'Non-unit', K-I,
     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
                  IF( I.GT.1 ) THEN
                     PREVLASTV = MIN( PREVLASTV, LASTV )
                  ELSE
                     PREVLASTV = LASTV
                  END IF
               END IF
               T( I, I ) = TAU( I )
            END IF
 *
 *        T_{1,2} = T_{1,2}*V_{2,2}
 *
         CALL STRMM('Right', 'Lower', 'No transpose', 'Unit', L,
     $               K-L, ONE, V(L+1, L+1), LDV, T(1, L+1), LDT)

 *
 *        T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2}
 *        Note: We assume K <= N, and GEMM will do nothing if N=K
 *
         CALL SGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, 
     $               V(K+1, 1), LDV, V(K+1, L+1), LDV, ONE, 
     $               T(1, L+1), LDT)
 *
 *        At this point, we have that T_{1,2} = V_1'*V_2
 *        All that is left is to pre and post multiply by -T_{1,1} and T_{2,2}
 *        respectively.
 *
 *        T_{1,2} = -T_{1,1}*T_{1,2}
 *
         CALL STRMM('Left', 'Upper', 'No transpose', 'Non-unit', L,
     $               K-L, NEG_ONE, T, LDT, T(1, L+1), LDT)
 *
 *        T_{1,2} = T_{1,2}*T_{2,2}
 *
         CALL STRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, 
     $               K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT)

      ELSE IF(LQ) THEN
 *
 *        Break V apart into 6 components
 *
 *        V = |----------------------|
 *            |V_{1,1} V_{1,2} V{1,3}|
 *            |0       V_{2,2} V{2,3}|
 *            |----------------------|
 *
 *        V_{1,1}\in\R^{l,l}      unit upper triangular
 *        V_{1,2}\in\R^{l,k-l}    rectangular
 *        V_{1,3}\in\R^{l,n-k}    rectangular
 *        
 *        V_{2,2}\in\R^{k-l,k-l}  unit upper triangular
 *        V_{2,3}\in\R^{k-l,n-k}  rectangular
 *
 *        Where l = floor(k/2)
 *
 *        We will construct the T matrix 
 *        T = |---------------|
 *            |T_{1,1} T_{1,2}|
 *            |0       T_{2,2}|
 *            |---------------|
 *
 *        T is the triangular factor obtained from block reflectors. 
 *        To motivate the structure, assume we have already computed T_{1,1}
 *        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2
 *
 *        T_{1,1}\in\R^{l, l}     upper triangular
 *        T_{2,2}\in\R^{k-l, k-l} upper triangular
 *        T_{1,2}\in\R^{l, k-l}   rectangular
 *
 *        Then, consider the product:
 *        
 *        (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2)
 *        = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2
 *        
 *        Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2}
 *        
 *        Then, we can define the matrix V as 
 *        V = |---|
 *            |V_1|
 *            |V_2|
 *            |---|
 *        
 *        So, our product is equivalent to the matrix product
 *        I - V'*T*V
 *        This means, we can compute T_{1,1} and T_{2,2}, then use this information
 *        to compute T_{1,2}
 *
 *        Compute T_{1,1} recursively
 *
         CALL SLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT)
 *
 *        Compute T_{2,2} recursively
 *
         CALL SLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, 
     $               TAU(L+1), T(L+1, L+1), LDT)

 *
 *        Compute T_{1,2}
 *        T_{1,2} = V_{1,2}
 *
         CALL SLACPY('All', L, K-L, V(1, L+1), LDV, T(1, L+1), LDT)
 *
 *        T_{1,2} = T_{1,2}*V_{2,2}'
 *
         CALL STRMM('Right', 'Upper', 'Transpose', 'Unit', L, K-L,
     $               ONE, V(L+1, L+1), LDV, T(1, L+1), LDT)

 *
 *        T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2}
 *        Note: We assume K <= N, and GEMM will do nothing if N=K
 *
         CALL SGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE,
     $               V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, 
     $               T(1, L+1), LDT)
 *
 *        At this point, we have that T_{1,2} = V_1*V_2'
 *        All that is left is to pre and post multiply by -T_{1,1} and T_{2,2}
 *        respectively.
 *
 *        T_{1,2} = -T_{1,1}*T_{1,2}
 *
         CALL STRMM('Left', 'Upper', 'No transpose', 'Non-unit', L,
     $               K-L, NEG_ONE, T, LDT, T(1, L+1), LDT)

 *
 *        T_{1,2} = T_{1,2}*T_{2,2}
 *
         CALL STRMM('Right', 'Upper', 'No transpose', 'Non-unit', L,
     $               K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT)
      ELSE IF(QL) THEN
 *
 *        Break V apart into 6 components
 *
 *        V = |---------------|
 *            |V_{1,1} V_{1,2}|
 *            |V_{2,1} V_{2,2}|
 *            |0       V_{3,2}|
 *            |---------------|
 *
 *        V_{1,1}\in\R^{n-k,k-l}  rectangular
 *        V_{2,1}\in\R^{k-l,k-l}  unit upper triangular
 *        
 *        V_{1,2}\in\R^{n-k,l}    rectangular
 *        V_{2,2}\in\R^{k-l,l}    rectangular
 *        V_{3,2}\in\R^{l,l}      unit upper triangular
 *
 *        We will construct the T matrix 
 *        T = |---------------|
 *            |T_{1,1} 0      |
 *            |T_{2,1} T_{2,2}|
 *            |---------------|
 *
 *        T is the triangular factor obtained from block reflectors. 
 *        To motivate the structure, assume we have already computed T_{1,1}
 *        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2
 *
 *        T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular
 *        T_{2,2}\in\R^{l, l}     non-unit lower triangular
 *        T_{2,1}\in\R^{k-l, l}   rectangular
 *
 *        Where l = floor(k/2)
 *
 *        Then, consider the product:
 *        
 *        (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1')
 *        = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1'
 *        
 *        Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1}
 *        
 *        Then, we can define the matrix V as 
 *        V = |-------|
 *            |V_1 V_2|
 *            |-------|
 *        
 *        So, our product is equivalent to the matrix product
 *        I - V*T*V'
 *        This means, we can compute T_{1,1} and T_{2,2}, then use this information
 *        to compute T_{2,1}
 *
 *        Compute T_{1,1} recursively
 *
         CALL SLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT)
 *
 *        Compute T_{2,2} recursively
 *
         CALL SLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV,
     $               TAU(K-L+1), T(K-L+1, K-L+1), LDT)
 *
 *        Compute T_{2,1}
 *        T_{2,1} = V_{2,2}'
 *
         DO J = 1, K-L
            DO I = 1, L
               T(K-L+I, J) = V(N-K+J, K-L+I)
            END DO
         END DO
      END IF
      RETURN
 *
 *     End of SLARFT
 *        T_{2,1} = T_{2,1}*V_{2,1}
 *
         CALL STRMM('Right', 'Upper', 'No transpose', 'Unit', L,
     $               K-L, ONE, V(N-K+1, 1), LDV, T(K-L+1, 1), LDT)

 *
 *        T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1}
 *        Note: We assume K <= N, and GEMM will do nothing if N=K
 *
         CALL SGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE,
     $               V(1, K-L+1), LDV, V, LDV, ONE, T(K-L+1, 1), 
     $               LDT)
 *
 *        At this point, we have that T_{2,1} = V_2'*V_1
 *        All that is left is to pre and post multiply by -T_{2,2} and T_{1,1}
 *        respectively.
 *
 *        T_{2,1} = -T_{2,2}*T_{2,1}
 *
         CALL STRMM('Left', 'Lower', 'No transpose', 'Non-unit', L,
     $               K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, 
     $               T(K-L+1, 1), LDT)
 *
      END
 *        T_{2,1} = T_{2,1}*T_{1,1}
 *
         CALL STRMM('Right', 'Lower', 'No transpose', 'Non-unit', L,
     $               K-L, ONE, T, LDT, T(K-L+1, 1), LDT)
      ELSE
 *
 *        Else means RQ case
 *
 *        Break V apart into 6 components
 *
 *        V = |-----------------------|
 *            |V_{1,1} V_{1,2} 0      |
 *            |V_{2,1} V_{2,2} V_{2,3}|
 *            |-----------------------|
 *
 *        V_{1,1}\in\R^{k-l,n-k}  rectangular
 *        V_{1,2}\in\R^{k-l,k-l}  unit lower triangular
 *
 *        V_{2,1}\in\R^{l,n-k}    rectangular
 *        V_{2,2}\in\R^{l,k-l}    rectangular
 *        V_{2,3}\in\R^{l,l}      unit lower triangular
 *
 *        We will construct the T matrix 
 *        T = |---------------|
 *            |T_{1,1} 0      |
 *            |T_{2,1} T_{2,2}|
 *            |---------------|
 *
 *        T is the triangular factor obtained from block reflectors. 
 *        To motivate the structure, assume we have already computed T_{1,1}
 *        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2
 *
 *        T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular
 *        T_{2,2}\in\R^{l, l}     non-unit lower triangular
 *        T_{2,1}\in\R^{k-l, l}   rectangular
 *
 *        Where l = floor(k/2)
 *
 *        Then, consider the product:
 *        
 *        (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1)
 *        = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1
 *        
 *        Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1}
 *        
 *        Then, we can define the matrix V as 
 *        V = |---|
 *            |V_1|
 *            |V_2|
 *            |---|
 *        
 *        So, our product is equivalent to the matrix product
 *        I - V'TV
 *        This means, we can compute T_{1,1} and T_{2,2}, then use this information
 *        to compute T_{2,1}
 *
 *        Compute T_{1,1} recursively
 *
         CALL SLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT)
 *
 *        Compute T_{2,2} recursively
 *
         CALL SLARFT(DIRECT, STOREV, N, L, V(K-L+1, 1), LDV,
     $               TAU(K-L+1), T(K-L+1, K-L+1), LDT)
 *
 *        Compute T_{2,1}
 *        T_{2,1} = V_{2,2}
 *
         CALL SLACPY('All', L, K-L, V(K-L+1, N-K+1), LDV,
     $      T(K-L+1, 1), LDT)

 *
 *        T_{2,1} = T_{2,1}*V_{1,2}'
 *
         CALL STRMM('Right', 'Lower', 'Transpose', 'Unit', L, K-L,
     $               ONE, V(1, N-K+1), LDV, T(K-L+1, 1), LDT)

 *
 *        T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} 
 *        Note: We assume K <= N, and GEMM will do nothing if N=K
 *
         CALL SGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, 
     $               V(K-L+1, 1), LDV, V, LDV, ONE, T(K-L+1, 1),
     $               LDT)

 *
 *        At this point, we have that T_{2,1} = V_2*V_1'
 *        All that is left is to pre and post multiply by -T_{2,2} and T_{1,1}
 *        respectively.
 *
 *        T_{2,1} = -T_{2,2}*T_{2,1}
 *
         CALL STRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L,
     $               K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, 
     $               T(K-L+1, 1), LDT)

 *
 *        T_{2,1} = T_{2,1}*T_{1,1}
 *
         CALL STRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L,
     $               K-L, ONE, T, LDT, T(K-L+1, 1), LDT)
      END IF
      END SUBROUTINE
--- a/lapack-netlib/SRC/zlarft.f
+++ b/lapack-netlib/SRC/zlarft.f
@@ -18,7 +18,7 @@
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *       RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
 *
 *       .. Scalar Arguments ..
 *       CHARACTER          DIRECT, STOREV
@@ -130,7 +130,7 @@
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup complex16OTHERauxiliary
 *> \ingroup larft
 *
 *> \par Further Details:
 *  =====================
@@ -159,166 +159,474 @@
 *> \endverbatim
 *>
 *  =====================================================================
      SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
      RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV,
     $                             TAU, T, LDT )
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
      CHARACTER          DIRECT, STOREV
      INTEGER            K, LDT, LDV, N
 *        .. Scalar Arguments
 *
      CHARACTER         DIRECT, STOREV
      INTEGER           K, LDT, LDV, N
 *     ..
 *     .. Array Arguments ..
      COMPLEX*16         T( LDT, * ), TAU( * ), V( LDV, * )
 *     ..
 *
 *  =====================================================================
      COMPLEX*16        T( LDT, * ), TAU( * ), V( LDV, * )
 *     ..
 *
 *     .. Parameters ..
      COMPLEX*16         ONE, ZERO
      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
 *     ..
 *
      COMPLEX*16        ONE, NEG_ONE, ZERO
      PARAMETER(ONE=1.0D+0, ZERO = 0.0D+0, NEG_ONE=-1.0D+0)
 *
 *     .. Local Scalars ..
      INTEGER            I, J, PREVLASTV, LASTV
 *     ..
 *
      INTEGER           I,J,L
      LOGICAL           QR,LQ,QL,DIRF,COLV
 *
 *     .. External Subroutines ..
      EXTERNAL           ZGEMV, ZTRMV, ZGEMM
 *     ..
 *     .. External Functions ..
      LOGICAL            LSAME
      EXTERNAL           LSAME
 *
      EXTERNAL          ZTRMM,ZGEMM,ZLACPY
 *
 *     .. External Functions..
 *
      LOGICAL           LSAME
      EXTERNAL          LSAME
 *
 *     .. Intrinsic Functions..
 *
      INTRINSIC         CONJG
 *     
 *     The general scheme used is inspired by the approach inside DGEQRT3
 *     which was (at the time of writing this code):
 *     Based on the algorithm of Elmroth and Gustavson,
 *     IBM J. Res. Develop. Vol 44 No. 4 July 2000.
 *     ..
 *     .. Executable Statements ..
 *
 *     Quick return if possible
 *
      IF( N.EQ.0 )
     $   RETURN
 *
      IF( LSAME( DIRECT, 'F' ) ) THEN
         PREVLASTV = N
         DO I = 1, K
            PREVLASTV = MAX( PREVLASTV, I )
            IF( TAU( I ).EQ.ZERO ) THEN
 *
 *              H(i)  =  I
 *
               DO J = 1, I
                  T( J, I ) = ZERO
               END DO
            ELSE
 *
 *              general case
 *
               IF( LSAME( STOREV, 'C' ) ) THEN
 *                 Skip any trailing zeros.
                  DO LASTV = N, I+1, -1
                     IF( V( LASTV, I ).NE.ZERO ) EXIT
                  END DO
                  DO J = 1, I-1
                     T( J, I ) = -TAU( I ) * CONJG( V( I , J ) )
                  END DO
                  J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i)
 *
                  CALL ZGEMV( 'Conjugate transpose', J-I, I-1,
     $                        -TAU( I ), V( I+1, 1 ), LDV,
     $                        V( I+1, I ), 1, ONE, T( 1, I ), 1 )
               ELSE
 *                 Skip any trailing zeros.
                  DO LASTV = N, I+1, -1
                     IF( V( I, LASTV ).NE.ZERO ) EXIT
                  END DO
                  DO J = 1, I-1
                     T( J, I ) = -TAU( I ) * V( J , I )
                  END DO
                  J = MIN( LASTV, PREVLASTV )
 *
 *                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H
 *
                  CALL ZGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ),
     $                        V( 1, I+1 ), LDV, V( I, I+1 ), LDV,
     $                        ONE, T( 1, I ), LDT )
               END IF
 *
 *              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
 *
               CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T,
     $                     LDT, T( 1, I ), 1 )
               T( I, I ) = TAU( I )
               IF( I.GT.1 ) THEN
                  PREVLASTV = MAX( PREVLASTV, LASTV )
               ELSE
                  PREVLASTV = LASTV
               END IF
             END IF
      IF(N.EQ.0.OR.K.EQ.0) THEN
         RETURN
      END IF
 *
 *     Base case
 *
      IF(N.EQ.1.OR.K.EQ.1) THEN
         T(1,1) = TAU(1)
         RETURN
      END IF
 *
 *     Beginning of executable statements
 *
      L = K / 2
 *
 *     Determine what kind of Q we need to compute
 *     We assume that if the user doesn't provide 'F' for DIRECT,
 *     then they meant to provide 'B' and if they don't provide
 *     'C' for STOREV, then they meant to provide 'R'
 *
      DIRF = LSAME(DIRECT,'F')
      COLV = LSAME(STOREV,'C')
 *
 *     QR happens when we have forward direction in column storage
 *
      QR = DIRF.AND.COLV
 *
 *     LQ happens when we have forward direction in row storage
 *
      LQ = DIRF.AND.(.NOT.COLV)
 *
 *     QL happens when we have backward direction in column storage
 *
      QL = (.NOT.DIRF).AND.COLV
 *
 *     The last case is RQ. Due to how we structured this, if the
 *     above 3 are false, then RQ must be true, so we never store 
 *     this
 *     RQ happens when we have backward direction in row storage
 *     RQ = (.NOT.DIRF).AND.(.NOT.COLV)
 *
      IF(QR) THEN
 *
 *        Break V apart into 6 components
 *
 *        V = |---------------|
 *            |V_{1,1} 0      |
 *            |V_{2,1} V_{2,2}|
 *            |V_{3,1} V_{3,2}|
 *            |---------------|
 *
 *        V_{1,1}\in\C^{l,l}      unit lower triangular
 *        V_{2,1}\in\C^{k-l,l}    rectangular
 *        V_{3,1}\in\C^{n-k,l}    rectangular
 *        
 *        V_{2,2}\in\C^{k-l,k-l}  unit lower triangular
 *        V_{3,2}\in\C^{n-k,k-l}  rectangular
 *
 *        We will construct the T matrix 
 *        T = |---------------|
 *            |T_{1,1} T_{1,2}|
 *            |0       T_{2,2}|
 *            |---------------|
 *
 *        T is the triangular factor obtained from block reflectors. 
 *        To motivate the structure, assume we have already computed T_{1,1}
 *        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2
 *
 *        T_{1,1}\in\C^{l, l}     upper triangular
 *        T_{2,2}\in\C^{k-l, k-l} upper triangular
 *        T_{1,2}\in\C^{l, k-l}   rectangular
 *
 *        Where l = floor(k/2)
 *
 *        Then, consider the product:
 *        
 *        (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2')
 *        = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2'
 *        
 *        Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2}
 *        
 *        Then, we can define the matrix V as 
 *        V = |-------|
 *            |V_1 V_2|
 *            |-------|
 *        
 *        So, our product is equivalent to the matrix product
 *        I - V*T*V'
 *        This means, we can compute T_{1,1} and T_{2,2}, then use this information
 *        to compute T_{1,2}
 *
 *        Compute T_{1,1} recursively
 *
         CALL ZLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT)
 *
 *        Compute T_{2,2} recursively
 *
         CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, 
     $               TAU(L+1), T(L+1, L+1), LDT)
 *
 *        Compute T_{1,2} 
 *        T_{1,2} = V_{2,1}'
 *
         DO J = 1, L
            DO I = 1, K-L
               T(J, L+I) = CONJG(V(L+I, J))
            END DO
         END DO
      ELSE
         PREVLASTV = 1
         DO I = K, 1, -1
            IF( TAU( I ).EQ.ZERO ) THEN
 *
 *              H(i)  =  I
 *
               DO J = I, K
                  T( J, I ) = ZERO
               END DO
            ELSE
 *
 *              general case
 *
               IF( I.LT.K ) THEN
                  IF( LSAME( STOREV, 'C' ) ) THEN
 *                    Skip any leading zeros.
                     DO LASTV = 1, I-1
                        IF( V( LASTV, I ).NE.ZERO ) EXIT
                     END DO
                     DO J = I+1, K
                        T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) )
                     END DO
                     J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i)
 *
                     CALL ZGEMV( 'Conjugate transpose', N-K+I-J, K-I,
     $                           -TAU( I ), V( J, I+1 ), LDV, V( J, I ),
     $                           1, ONE, T( I+1, I ), 1 )
                  ELSE
 *                    Skip any leading zeros.
                     DO LASTV = 1, I-1
                        IF( V( I, LASTV ).NE.ZERO ) EXIT
                     END DO
                     DO J = I+1, K
                        T( J, I ) = -TAU( I ) * V( J, N-K+I )
                     END DO
                     J = MAX( LASTV, PREVLASTV )
 *
 *                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H
 *
                     CALL ZGEMM( 'N', 'C', K-I, 1, N-K+I-J, -TAU( I ),
     $                           V( I+1, J ), LDV, V( I, J ), LDV,
     $                           ONE, T( I+1, I ), LDT )
                  END IF
 *
 *                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
 *
                  CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', K-I,
     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
                  IF( I.GT.1 ) THEN
                     PREVLASTV = MIN( PREVLASTV, LASTV )
                  ELSE
                     PREVLASTV = LASTV
                  END IF
               END IF
               T( I, I ) = TAU( I )
            END IF
 *
 *        T_{1,2} = T_{1,2}*V_{2,2}
 *
         CALL ZTRMM('Right', 'Lower', 'No transpose', 'Unit', L,
     $               K-L, ONE, V(L+1, L+1), LDV, T(1, L+1), LDT)

 *
 *        T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2}
 *        Note: We assume K <= N, and GEMM will do nothing if N=K
 *
         CALL ZGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, 
     $               V(K+1, 1), LDV, V(K+1, L+1), LDV, ONE, 
     $               T(1, L+1), LDT)
 *
 *        At this point, we have that T_{1,2} = V_1'*V_2
 *        All that is left is to pre and post multiply by -T_{1,1} and T_{2,2}
 *        respectively.
 *
 *        T_{1,2} = -T_{1,1}*T_{1,2}
 *
         CALL ZTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L,
     $               K-L, NEG_ONE, T, LDT, T(1, L+1), LDT)
 *
 *        T_{1,2} = T_{1,2}*T_{2,2}
 *
         CALL ZTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, 
     $               K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT)

      ELSE IF(LQ) THEN
 *
 *        Break V apart into 6 components
 *
 *        V = |----------------------|
 *            |V_{1,1} V_{1,2} V{1,3}|
 *            |0       V_{2,2} V{2,3}|
 *            |----------------------|
 *
 *        V_{1,1}\in\C^{l,l}      unit upper triangular
 *        V_{1,2}\in\C^{l,k-l}    rectangular
 *        V_{1,3}\in\C^{l,n-k}    rectangular
 *        
 *        V_{2,2}\in\C^{k-l,k-l}  unit upper triangular
 *        V_{2,3}\in\C^{k-l,n-k}  rectangular
 *
 *        Where l = floor(k/2)
 *
 *        We will construct the T matrix 
 *        T = |---------------|
 *            |T_{1,1} T_{1,2}|
 *            |0       T_{2,2}|
 *            |---------------|
 *
 *        T is the triangular factor obtained from block reflectors. 
 *        To motivate the structure, assume we have already computed T_{1,1}
 *        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2
 *
 *        T_{1,1}\in\C^{l, l}         upper triangular
 *        T_{2,2}\in\C^{k-l, k-l}     upper triangular
 *        T_{1,2}\in\C^{l, k-l}       rectangular
 *
 *        Then, consider the product:
 *        
 *        (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2)
 *        = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2
 *        
 *        Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2}
 *        
 *        Then, we can define the matrix V as 
 *        V = |---|
 *            |V_1|
 *            |V_2|
 *            |---|
 *        
 *        So, our product is equivalent to the matrix product
 *        I - V'*T*V
 *        This means, we can compute T_{1,1} and T_{2,2}, then use this information
 *        to compute T_{1,2}
 *
 *        Compute T_{1,1} recursively
 *
         CALL ZLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT)
 *
 *        Compute T_{2,2} recursively
 *
         CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, 
     $               TAU(L+1), T(L+1, L+1), LDT)

 *
 *        Compute T_{1,2}
 *        T_{1,2} = V_{1,2}
 *
         CALL ZLACPY('All', L, K-L, V(1, L+1), LDV, T(1, L+1), LDT)
 *
 *        T_{1,2} = T_{1,2}*V_{2,2}'
 *
         CALL ZTRMM('Right', 'Upper', 'Conjugate', 'Unit', L, K-L,
     $               ONE, V(L+1, L+1), LDV, T(1, L+1), LDT)

 *
 *        T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2}
 *        Note: We assume K <= N, and GEMM will do nothing if N=K
 *
         CALL ZGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE,
     $               V(1, K+1), LDV, V(L+1, K+1), LDV, ONE,
     $               T(1, L+1), LDT)
 *
 *        At this point, we have that T_{1,2} = V_1*V_2'
 *        All that is left is to pre and post multiply by -T_{1,1} and T_{2,2}
 *        respectively.
 *
 *        T_{1,2} = -T_{1,1}*T_{1,2}
 *
         CALL ZTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L,
     $               K-L, NEG_ONE, T, LDT, T(1, L+1), LDT)

 *
 *        T_{1,2} = T_{1,2}*T_{2,2}
 *
         CALL ZTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L,
     $               K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT)
      ELSE IF(QL) THEN
 *
 *        Break V apart into 6 components
 *
 *        V = |---------------|
 *            |V_{1,1} V_{1,2}|
 *            |V_{2,1} V_{2,2}|
 *            |0       V_{3,2}|
 *            |---------------|
 *
 *        V_{1,1}\in\C^{n-k,k-l}  rectangular
 *        V_{2,1}\in\C^{k-l,k-l}  unit upper triangular
 *        
 *        V_{1,2}\in\C^{n-k,l}    rectangular
 *        V_{2,2}\in\C^{k-l,l}    rectangular
 *        V_{3,2}\in\C^{l,l}      unit upper triangular
 *
 *        We will construct the T matrix 
 *        T = |---------------|
 *            |T_{1,1} 0      |
 *            |T_{2,1} T_{2,2}|
 *            |---------------|
 *
 *        T is the triangular factor obtained from block reflectors. 
 *        To motivate the structure, assume we have already computed T_{1,1}
 *        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2
 *
 *        T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular
 *        T_{2,2}\in\C^{l, l}     non-unit lower triangular
 *        T_{2,1}\in\C^{k-l, l}   rectangular
 *
 *        Where l = floor(k/2)
 *
 *        Then, consider the product:
 *        
 *        (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1')
 *        = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1'
 *        
 *        Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1}
 *        
 *        Then, we can define the matrix V as 
 *        V = |-------|
 *            |V_1 V_2|
 *            |-------|
 *        
 *        So, our product is equivalent to the matrix product
 *        I - V*T*V'
 *        This means, we can compute T_{1,1} and T_{2,2}, then use this information
 *        to compute T_{2,1}
 *
 *        Compute T_{1,1} recursively
 *
         CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT)
 *
 *        Compute T_{2,2} recursively
 *
         CALL ZLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV,
     $               TAU(K-L+1), T(K-L+1, K-L+1), LDT)
 *
 *        Compute T_{2,1}
 *        T_{2,1} = V_{2,2}'
 *
         DO J = 1, K-L
            DO I = 1, L
               T(K-L+I, J) = CONJG(V(N-K+J, K-L+I))
            END DO
         END DO
      END IF
      RETURN
 *
 *     End of ZLARFT
 *        T_{2,1} = T_{2,1}*V_{2,1}
 *
         CALL ZTRMM('Right', 'Upper', 'No transpose', 'Unit', L,
     $               K-L, ONE, V(N-K+1, 1), LDV, T(K-L+1, 1), LDT)

 *
 *        T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1}
 *        Note: We assume K <= N, and GEMM will do nothing if N=K
 *
         CALL ZGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE,
     $               V(1, K-L+1), LDV, V, LDV, ONE, T(K-L+1, 1),
     $               LDT)
 *
 *        At this point, we have that T_{2,1} = V_2'*V_1
 *        All that is left is to pre and post multiply by -T_{2,2} and T_{1,1}
 *        respectively.
 *
 *        T_{2,1} = -T_{2,2}*T_{2,1}
 *
         CALL ZTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L,
     $               K-L, NEG_ONE, T(K-L+1, K-L+1), LDT,
     $               T(K-L+1, 1), LDT)
 *
      END
 *        T_{2,1} = T_{2,1}*T_{1,1}
 *
         CALL ZTRMM('Right', 'Lower', 'No transpose', 'Non-unit', L,
     $               K-L, ONE, T, LDT, T(K-L+1, 1), LDT)
      ELSE
 *
 *        Else means RQ case
 *
 *        Break V apart into 6 components
 *
 *        V = |-----------------------|
 *            |V_{1,1} V_{1,2} 0      |
 *            |V_{2,1} V_{2,2} V_{2,3}|
 *            |-----------------------|
 *
 *        V_{1,1}\in\C^{k-l,n-k}  rectangular
 *        V_{1,2}\in\C^{k-l,k-l}  unit lower triangular
 *
 *        V_{2,1}\in\C^{l,n-k}    rectangular
 *        V_{2,2}\in\C^{l,k-l}    rectangular
 *        V_{2,3}\in\C^{l,l}      unit lower triangular
 *
 *        We will construct the T matrix 
 *        T = |---------------|
 *            |T_{1,1} 0      |
 *            |T_{2,1} T_{2,2}|
 *            |---------------|
 *
 *        T is the triangular factor obtained from block reflectors. 
 *        To motivate the structure, assume we have already computed T_{1,1}
 *        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2
 *
 *        T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular
 *        T_{2,2}\in\C^{l, l}     non-unit lower triangular
 *        T_{2,1}\in\C^{k-l, l}   rectangular
 *
 *        Where l = floor(k/2)
 *
 *        Then, consider the product:
 *        
 *        (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1)
 *        = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1
 *        
 *        Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1}
 *        
 *        Then, we can define the matrix V as 
 *        V = |---|
 *            |V_1|
 *            |V_2|
 *            |---|
 *        
 *        So, our product is equivalent to the matrix product
 *        I - V'*T*V
 *        This means, we can compute T_{1,1} and T_{2,2}, then use this information
 *        to compute T_{2,1}
 *
 *        Compute T_{1,1} recursively
 *
         CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT)
 *
 *        Compute T_{2,2} recursively
 *
         CALL ZLARFT(DIRECT, STOREV, N, L, V(K-L+1, 1), LDV,
     $               TAU(K-L+1), T(K-L+1, K-L+1), LDT)
 *
 *        Compute T_{2,1}
 *        T_{2,1} = V_{2,2}
 *
         CALL ZLACPY('All', L, K-L, V(K-L+1, N-K+1), LDV,
     $               T(K-L+1, 1), LDT)

 *
 *        T_{2,1} = T_{2,1}*V_{1,2}'
 *
         CALL ZTRMM('Right', 'Lower', 'Conjugate', 'Unit', L, K-L,
     $               ONE, V(1, N-K+1), LDV, T(K-L+1, 1), LDT)

 *
 *        T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} 
 *        Note: We assume K <= N, and GEMM will do nothing if N=K
 *
         CALL ZGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, 
     $               V(K-L+1, 1), LDV, V, LDV, ONE, T(K-L+1, 1),
     $               LDT)

 *
 *        At this point, we have that T_{2,1} = V_2*V_1'
 *        All that is left is to pre and post multiply by -T_{2,2} and T_{1,1}
 *        respectively.
 *
 *        T_{2,1} = -T_{2,2}*T_{2,1}
 *
         CALL ZTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L,
     $               K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, 
     $               T(K-L+1, 1), LDT)

 *
 *        T_{2,1} = T_{2,1}*T_{1,1}
 *
         CALL ZTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L,
     $               K-L, ONE, T, LDT, T(K-L+1, 1), LDT)
      END IF
      END SUBROUTINE