Add Dynamic Mode Decomposition functions (Reference-LAPACK PR 736)

2 years ago · 8d57af540b
--- a/lapack-netlib/SRC/cgedmd.f90
+++ b/lapack-netlib/SRC/cgedmd.f90
@@ -0,0 +1,995 @@
      SUBROUTINE CGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,   &
                         M, N, X, LDX, Y, LDY, NRNK, TOL,   &
                         K, EIGS, Z, LDZ, RES, B,    LDB,   &
                         W, LDW,  S, LDS, ZWORK,  LZWORK,   &
                         RWORK, LRWORK, IWORK, LIWORK, INFO )
 !   March 2023
 !.....
      USE                   iso_fortran_env
      IMPLICIT NONE
      INTEGER, PARAMETER :: WP = real32
 !.....
 !     Scalar arguments
      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
                                 NRNK, LDZ, LDB, LDW,  LDS, &
                                 LIWORK, LRWORK, LZWORK
      INTEGER,       INTENT(OUT)  :: K, INFO
      REAL(KIND=WP), INTENT(IN)   ::    TOL
 !     Array arguments
      COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
      COMPLEX(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
                                         W(LDW,*), S(LDS,*)
      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
      REAL(KIND=WP),    INTENT(OUT)   :: RES(*)
      REAL(KIND=WP),    INTENT(OUT)   :: RWORK(*)
      INTEGER,          INTENT(OUT)   :: IWORK(*)
 !............................................................
 !     Purpose
 !     =======
 !     CGEDMD computes the Dynamic Mode Decomposition (DMD) for
 !     a pair of data snapshot matrices. For the input matrices
 !     X and Y such that Y = A*X with an unaccessible matrix
 !     A, CGEDMD computes a certain number of Ritz pairs of A using
 !     the standard Rayleigh-Ritz extraction from a subspace of
 !     range(X) that is determined using the leading left singular
 !     vectors of X. Optionally, CGEDMD returns the residuals
 !     of the computed Ritz pairs, the information needed for
 !     a refinement of the Ritz vectors, or the eigenvectors of
 !     the Exact DMD.
 !     For further details see the references listed
 !     below. For more details of the implementation see [3].
 !
 !     References
 !     ==========
 !     [1] P. Schmid: Dynamic mode decomposition of numerical
 !         and experimental data,
 !         Journal of Fluid Mechanics 656, 5-28, 2010.
 !     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
 !         decompositions: analysis and enhancements,
 !         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
 !     [3] Z. Drmac: A LAPACK implementation of the Dynamic
 !         Mode Decomposition I. Technical report. AIMDyn Inc.
 !         and LAPACK Working Note 298.
 !     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
 !         Brunton, N. Kutz: On Dynamic Mode Decomposition:
 !         Theory and Applications, Journal of Computational
 !         Dynamics 1(2), 391 -421, 2014.
 !
 !......................................................................
 !     Developed and supported by:
 !     ===========================
 !     Developed and coded by Zlatko Drmac, Faculty of Science,
 !     University of Zagreb;  drmac@math.hr
 !     In cooperation with
 !     AIMdyn Inc., Santa Barbara, CA.
 !     and supported by
 !     - DARPA SBIR project "Koopman Operator-Based Forecasting
 !     for Nonstationary Processes from Near-Term, Limited
 !     Observational Data" Contract No: W31P4Q-21-C-0007
 !     - DARPA PAI project "Physics-Informed Machine Learning
 !     Methodologies" Contract No: HR0011-18-9-0033
 !     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
 !     Framework for Space-Time Analysis of Process Dynamics"
 !     Contract No: HR0011-16-C-0116
 !     Any opinions, findings and conclusions or recommendations
 !     expressed in this material are those of the author and
 !     do not necessarily reflect the views of the DARPA SBIR
 !     Program Office
 !============================================================
 !     Distribution Statement A:
 !     Approved for Public Release, Distribution Unlimited.
 !     Cleared by DARPA on September 29, 2022
 !============================================================
 !......................................................................
 !     Arguments
 !     =========
 !     JOBS (input) CHARACTER*1
 !     Determines whether the initial data snapshots are scaled
 !     by a diagonal matrix.
 !     'S' :: The data snapshots matrices X and Y are multiplied
 !            with a diagonal matrix D so that X*D has unit
 !            nonzero columns (in the Euclidean 2-norm)
 !     'C' :: The snapshots are scaled as with the 'S' option.
 !            If it is found that an i-th column of X is zero
 !            vector and the corresponding i-th column of Y is
 !            non-zero, then the i-th column of Y is set to
 !            zero and a warning flag is raised.
 !     'Y' :: The data snapshots matrices X and Y are multiplied
 !            by a diagonal matrix D so that Y*D has unit
 !            nonzero columns (in the Euclidean 2-norm)
 !     'N' :: No data scaling.
 !.....
 !     JOBZ (input) CHARACTER*1
 !     Determines whether the eigenvectors (Koopman modes) will
 !     be computed.
 !     'V' :: The eigenvectors (Koopman modes) will be computed
 !            and returned in the matrix Z.
 !            See the description of Z.
 !     'F' :: The eigenvectors (Koopman modes) will be returned
 !            in factored form as the product X(:,1:K)*W, where X
 !            contains a POD basis (leading left singular vectors
 !            of the data matrix X) and W contains the eigenvectors
 !            of the corresponding Rayleigh quotient.
 !            See the descriptions of K, X, W, Z.
 !     'N' :: The eigenvectors are not computed.
 !.....
 !     JOBR (input) CHARACTER*1
 !     Determines whether to compute the residuals.
 !     'R' :: The residuals for the computed eigenpairs will be
 !            computed and stored in the array RES.
 !            See the description of RES.
 !            For this option to be legal, JOBZ must be 'V'.
 !     'N' :: The residuals are not computed.
 !.....
 !     JOBF (input) CHARACTER*1
 !     Specifies whether to store information needed for post-
 !     processing (e.g. computing refined Ritz vectors)
 !     'R' :: The matrix needed for the refinement of the Ritz
 !            vectors is computed and stored in the array B.
 !            See the description of B.
 !     'E' :: The unscaled eigenvectors of the Exact DMD are
 !            computed and returned in the array B. See the
 !            description of B.
 !     'N' :: No eigenvector refinement data is computed.
 !.....
 !     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
 !     Allows for a selection of the SVD algorithm from the
 !     LAPACK library.
 !     1 :: CGESVD (the QR SVD algorithm)
 !     2 :: CGESDD (the Divide and Conquer algorithm; if enough
 !          workspace available, this is the fastest option)
 !     3 :: CGESVDQ (the preconditioned QR SVD  ; this and 4
 !          are the most accurate options)
 !     4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3
 !          are the most accurate options)
 !     For the four methods above, a significant difference in
 !     the accuracy of small singular values is possible if
 !     the snapshots vary in norm so that X is severely
 !     ill-conditioned. If small (smaller than EPS*||X||)
 !     singular values are of interest and JOBS=='N',  then
 !     the options (3, 4) give the most accurate results, where
 !     the option 4 is slightly better and with stronger
 !     theoretical background.
 !     If JOBS=='S', i.e. the columns of X will be normalized,
 !     then all methods give nearly equally accurate results.
 !.....
 !     M (input) INTEGER, M>= 0
 !     The state space dimension (the row dimension of X, Y).
 !.....
 !     N (input) INTEGER, 0 <= N <= M
 !     The number of data snapshot pairs
 !     (the number of columns of X and Y).
 !.....
 !     X (input/output) COMPLEX(KIND=WP) M-by-N array
 !   > On entry, X contains the data snapshot matrix X. It is
 !     assumed that the column norms of X are in the range of
 !     the normalized floating point numbers.
 !   < On exit, the leading K columns of X contain a POD basis,
 !     i.e. the leading K left singular vectors of the input
 !     data matrix X, U(:,1:K). All N columns of X contain all
 !     left singular vectors of the input matrix X.
 !     See the descriptions of K, Z and W.
 !.....
 !     LDX (input) INTEGER, LDX >= M
 !     The leading dimension of the array X.
 !.....
 !     Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array
 !   > On entry, Y contains the data snapshot matrix Y
 !   < On exit,
 !     If JOBR == 'R', the leading K columns of Y  contain
 !     the residual vectors for the computed Ritz pairs.
 !     See the description of RES.
 !     If JOBR == 'N', Y contains the original input data,
 !                     scaled according to the value of JOBS.
 !.....
 !     LDY (input) INTEGER , LDY >= M
 !     The leading dimension of the array Y.
 !.....
 !     NRNK (input) INTEGER
 !     Determines the mode how to compute the numerical rank,
 !     i.e. how to truncate small singular values of the input
 !     matrix X. On input, if
 !     NRNK = -1 :: i-th singular value sigma(i) is truncated
 !                  if sigma(i) <= TOL*sigma(1)
 !                  This option is recommended.
 !     NRNK = -2 :: i-th singular value sigma(i) is truncated
 !                  if sigma(i) <= TOL*sigma(i-1)
 !                  This option is included for R&D purposes.
 !                  It requires highly accurate SVD, which
 !                  may not be feasible.
 !     The numerical rank can be enforced by using positive
 !     value of NRNK as follows:
 !     0 < NRNK <= N :: at most NRNK largest singular values
 !     will be used. If the number of the computed nonzero
 !     singular values is less than NRNK, then only those
 !     nonzero values will be used and the actually used
 !     dimension is less than NRNK. The actual number of
 !     the nonzero singular values is returned in the variable
 !     K. See the descriptions of TOL and  K.
 !.....
 !     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
 !     The tolerance for truncating small singular values.
 !     See the description of NRNK.
 !.....
 !     K (output) INTEGER,  0 <= K <= N
 !     The dimension of the POD basis for the data snapshot
 !     matrix X and the number of the computed Ritz pairs.
 !     The value of K is determined according to the rule set
 !     by the parameters NRNK and TOL.
 !     See the descriptions of NRNK and TOL.
 !.....
 !     EIGS (output) COMPLEX(KIND=WP) N-by-1 array
 !     The leading K (K<=N) entries of EIGS contain
 !     the computed eigenvalues (Ritz values).
 !     See the descriptions of K, and Z.
 !.....
 !     Z (workspace/output) COMPLEX(KIND=WP)  M-by-N array
 !     If JOBZ =='V' then Z contains the  Ritz vectors.  Z(:,i)
 !     is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
 !     If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
 !     the columns of X(:,1:K)*W(1:K,1:K), i.e. X(:,1:K)*W(:,i)
 !     is an eigenvector corresponding to EIGS(i). The columns
 !     of W(1:k,1:K) are the computed eigenvectors of the
 !     K-by-K Rayleigh quotient.
 !     See the descriptions of EIGS, X and W.
 !.....
 !     LDZ (input) INTEGER , LDZ >= M
 !     The leading dimension of the array Z.
 !.....
 !     RES (output) REAL(KIND=WP) N-by-1 array
 !     RES(1:K) contains the residuals for the K computed
 !     Ritz pairs,
 !     RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
 !     See the description of EIGS and Z.
 !.....
 !     B (output) COMPLEX(KIND=WP)  M-by-N array.
 !     IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
 !     be used for computing the refined vectors; see further
 !     details in the provided references.
 !     If JOBF == 'E', B(1:M,1:K) contains
 !     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
 !     Exact DMD, up to scaling by the inverse eigenvalues.
 !     If JOBF =='N', then B is not referenced.
 !     See the descriptions of X, W, K.
 !.....
 !     LDB (input) INTEGER, LDB >= M
 !     The leading dimension of the array B.
 !.....
 !     W (workspace/output) COMPLEX(KIND=WP) N-by-N array
 !     On exit, W(1:K,1:K) contains the K computed
 !     eigenvectors of the matrix Rayleigh quotient.
 !     The Ritz vectors (returned in Z) are the
 !     product of X (containing a POD basis for the input
 !     matrix X) and W. See the descriptions of K, S, X and Z.
 !     W is also used as a workspace to temporarily store the
 !     right singular vectors of X.
 !.....
 !     LDW (input) INTEGER, LDW >= N
 !     The leading dimension of the array W.
 !.....
 !     S (workspace/output) COMPLEX(KIND=WP) N-by-N array
 !     The array S(1:K,1:K) is used for the matrix Rayleigh
 !     quotient. This content is overwritten during
 !     the eigenvalue decomposition by CGEEV.
 !     See the description of K.
 !.....
 !     LDS (input) INTEGER, LDS >= N
 !     The leading dimension of the array S.
 !.....
 !     ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array
 !     ZWORK is used as complex workspace in the complex SVD, as
 !     specified by WHTSVD (1,2, 3 or 4) and for CGEEV for computing
 !     the eigenvalues of a Rayleigh quotient.
 !     If the call to CGEDMD is only workspace query, then
 !     ZWORK(1) contains the minimal complex workspace length and
 !     ZWORK(2) is the optimal complex workspace length.
 !     Hence, the length of work is at least 2.
 !     See the description of LZWORK.
 !.....
 !     LZWORK (input) INTEGER
 !     The minimal length of the workspace vector ZWORK.
 !     LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_CGEEV),
 !     where LZWORK_CGEEV = MAX( 1, 2*N )  and the minimal
 !     LZWORK_SVD is calculated as follows
 !     If WHTSVD == 1 :: CGESVD ::
 !        LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N))
 !     If WHTSVD == 2 :: CGESDD ::
 !        LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
 !     If WHTSVD == 3 :: CGESVDQ ::
 !        LZWORK_SVD = obtainable by a query
 !     If WHTSVD == 4 :: CGEJSV ::
 !        LZWORK_SVD = obtainable by a query
 !     If on entry LZWORK = -1, then a workspace query is
 !     assumed and the procedure only computes the minimal
 !     and the optimal workspace lengths and returns them in
 !     LZWORK(1) and LZWORK(2), respectively.
 !.....
 !     RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array
 !     On exit, RWORK(1:N) contains the singular values of
 !     X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
 !     If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain
 !     scaling factor RWORK(N+2)/RWORK(N+1) used to scale X
 !     and Y to avoid overflow in the SVD of X.
 !     This may be of interest if the scaling option is off
 !     and as many as possible smallest eigenvalues are
 !     desired to the highest feasible accuracy.
 !     If the call to CGEDMD is only workspace query, then
 !     RWORK(1) contains the minimal workspace length.
 !     See the description of LRWORK.
 !.....
 !     LRWORK (input) INTEGER
 !     The minimal length of the workspace vector RWORK.
 !     LRWORK is calculated as follows:
 !     LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_CGEEV), where
 !     LRWORK_CGEEV = MAX(1,2*N) and RWORK_SVD is the real workspace
 !     for the SVD subroutine determined by the input parameter
 !     WHTSVD.
 !     If WHTSVD == 1 :: CGESVD ::
 !        LRWORK_SVD = 5*MIN(M,N)
 !     If WHTSVD == 2 :: CGESDD ::
 !        LRWORK_SVD =  MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N),
 !        2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
 !     If WHTSVD == 3 :: CGESVDQ ::
 !        LRWORK_SVD = obtainable by a query
 !     If WHTSVD == 4 :: CGEJSV ::
 !        LRWORK_SVD = obtainable by a query
 !     If on entry LRWORK = -1, then a workspace query is
 !     assumed and the procedure only computes the minimal
 !     real workspace length and returns it in RWORK(1).
 !.....
 !     IWORK (workspace/output) INTEGER LIWORK-by-1 array
 !     Workspace that is required only if WHTSVD equals
 !     2 , 3 or 4. (See the description of WHTSVD).
 !     If on entry LWORK =-1 or LIWORK=-1, then the
 !     minimal length of IWORK is computed and returned in
 !     IWORK(1). See the description of LIWORK.
 !.....
 !     LIWORK (input) INTEGER
 !     The minimal length of the workspace vector IWORK.
 !     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
 !     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
 !     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
 !     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
 !     If on entry LIWORK = -1, then a workspace query is
 !     assumed and the procedure only computes the minimal
 !     and the optimal workspace lengths for  ZWORK, RWORK and
 !     IWORK. See the descriptions of ZWORK, RWORK and IWORK.
 !.....
 !     INFO (output) INTEGER
 !     -i < 0 :: On entry, the i-th argument had an
 !               illegal value
 !        = 0 :: Successful return.
 !        = 1 :: Void input. Quick exit (M=0 or N=0).
 !        = 2 :: The SVD computation of X did not converge.
 !               Suggestion: Check the input data and/or
 !               repeat with different WHTSVD.
 !        = 3 :: The computation of the eigenvalues did not
 !               converge.
 !        = 4 :: If data scaling was requested on input and
 !               the procedure found inconsistency in the data
 !               such that for some column index i,
 !               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
 !               to zero if JOBS=='C'. The computation proceeds
 !               with original or modified data and warning
 !               flag is set with INFO=4.
 !.............................................................
 !.............................................................
 !     Parameters
 !     ~~~~~~~~~~
      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
      COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )

 !     Local scalars
 !     ~~~~~~~~~~~~~
      REAL(KIND=WP) :: OFL,   ROOTSC, SCALE,  SMALL,   &
                       SSUM,  XSCL1,  XSCL2
      INTEGER       ::  i,  j, IMINWR,  INFO1, INFO2,   &
                        LWRKEV, LWRSDD, LWRSVD, LWRSVJ, &
                       LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
                       MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
                       OLWORK, MLRWRK
      LOGICAL       ::  BADXY, LQUERY, SCCOLX, SCCOLY, &
                        WNTEX, WNTREF, WNTRES, WNTVEC
      CHARACTER     ::  JOBZL, T_OR_N
      CHARACTER     ::  JSVOPT
 !
 !     Local arrays
 !     ~~~~~~~~~~~~
      REAL(KIND=WP) :: RDUMMY(2)

 !     External functions (BLAS and LAPACK)
 !     ~~~~~~~~~~~~~~~~~
      REAL(KIND=WP) CLANGE, SLAMCH, SCNRM2
      EXTERNAL      CLANGE, SLAMCH, SCNRM2, ICAMAX
      INTEGER                               ICAMAX
      LOGICAL       SISNAN, LSAME
      EXTERNAL      SISNAN, LSAME

 !     External subroutines (BLAS and LAPACK)
 !     ~~~~~~~~~~~~~~~~~~~~
      EXTERNAL      CAXPY,  CGEMM,  CSSCAL
      EXTERNAL      CGEEV,  CGEJSV, CGESDD, CGESVD, CGESVDQ, &
                    CLACPY, CLASCL, CLASSQ, XERBLA

 !     Intrinsic functions
 !     ~~~~~~~~~~~~~~~~~~~
      INTRINSIC     FLOAT, INT, MAX, SQRT
 !............................................................
 !
 !    Test the input arguments
 !
      WNTRES = LSAME(JOBR,'R')
      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
      SCCOLY = LSAME(JOBS,'Y')
      WNTVEC = LSAME(JOBZ,'V')
      WNTREF = LSAME(JOBF,'R')
      WNTEX  = LSAME(JOBF,'E')
      INFO   = 0
      LQUERY = ( ( LZWORK == -1 ) .OR. ( LIWORK == -1 ) &
                                  .OR. ( LRWORK == -1 ) )
 !
      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
                                  LSAME(JOBS,'N')) )   THEN
          INFO = -1
      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
                              .OR. LSAME(JOBZ,'F')) )  THEN
          INFO = -2
      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
          INFO = -3
      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
                LSAME(JOBF,'N') ) )                    THEN
          INFO = -4
      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
          INFO = -5
      ELSE IF ( M < 0 )   THEN
          INFO = -6
      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
          INFO = -7
      ELSE IF ( LDX < M ) THEN
          INFO = -9
      ELSE IF ( LDY < M ) THEN
          INFO = -11
      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
          INFO = -12
      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
          INFO = -13
      ELSE IF ( LDZ < M ) THEN
          INFO = -17
      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
          INFO = -20
      ELSE IF ( LDW < N ) THEN
          INFO = -22
      ELSE IF ( LDS < N ) THEN
          INFO = -24
      END IF
 !
      IF ( INFO == 0 ) THEN
          ! Compute the minimal and the optimal workspace
          ! requirements. Simulate running the code and
          ! determine minimal and optimal sizes of the
          ! workspace at any moment of the run.
         IF ( N == 0 ) THEN
             ! Quick return. All output except K is void.
             ! INFO=1 signals the void input.
             ! In case of a workspace query, the default
             ! minimal workspace lengths are returned.
            IF ( LQUERY ) THEN
                IWORK(1) = 1
                RWORK(1) = 1
                ZWORK(1) = 2
                ZWORK(2) = 2
            ELSE
               K   =  0
            END IF
            INFO = 1
            RETURN
         END IF

         IMINWR = 1
         MLRWRK = MAX(1,N)
         MLWORK = 2
         OLWORK = 2
         SELECT CASE ( WHTSVD )
         CASE (1)
             ! The following is specified as the minimal
             ! length of WORK in the definition of CGESVD:
             ! MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
             MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
             MLWORK = MAX(MLWORK,MWRSVD)
             MLRWRK = MAX(MLRWRK,N + 5*MIN(M,N))
             IF ( LQUERY ) THEN
                CALL CGESVD( 'O', 'S', M, N, X, LDX, RWORK, &
                     B, LDB, W, LDW, ZWORK, -1, RDUMMY, INFO1 )
                LWRSVD = INT( ZWORK(1) )
                OLWORK = MAX(OLWORK,LWRSVD)
             END IF
         CASE (2)
             ! The following is specified as the minimal
             ! length of WORK in the definition of CGESDD:
             ! MWRSDD = 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
             ! RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N)
             ! In LAPACK 3.10.1 RWORK is defined differently.
             ! Below we take max over the two versions.
             ! IMINWR = 8*MIN(M,N)
             MWRSDD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
             MLWORK = MAX(MLWORK,MWRSDD)
             IMINWR = 8*MIN(M,N)
             MLRWRK = MAX( MLRWRK,  N +                    &
                      MAX( 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), &
                           5*MIN(M,N)*MIN(M,N)+5*MIN(M,N), &
                           2*MAX(M,N)*MIN(M,N)+            &
                           2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
             IF ( LQUERY ) THEN
                CALL CGESDD( 'O', M, N, X, LDX, RWORK, B,     &
                     LDB, W, LDW, ZWORK, -1, RDUMMY, IWORK, INFO1 )
                LWRSDD = MAX(MWRSDD,INT( ZWORK(1) ))
                OLWORK = MAX(OLWORK,LWRSDD)
             END IF
         CASE (3)
             CALL CGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
                  X, LDX, RWORK, Z, LDZ, W, LDW, NUMRNK,  &
                  IWORK, -1, ZWORK, -1, RDUMMY, -1, INFO1 )
             IMINWR = IWORK(1)
             MWRSVQ = INT(ZWORK(2))
             MLWORK = MAX(MLWORK,MWRSVQ)
             MLRWRK = MAX(MLRWRK,N + INT(RDUMMY(1)))
             IF ( LQUERY ) THEN
                LWRSVQ = INT(ZWORK(1))
                OLWORK = MAX(OLWORK,LWRSVQ)
             END IF
         CASE (4)
             JSVOPT = 'J'
             CALL CGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
                   N, X, LDX, RWORK, Z, LDZ, W, LDW,       &
                   ZWORK, -1, RDUMMY, -1, IWORK, INFO1 )
             IMINWR = IWORK(1)
             MWRSVJ = INT(ZWORK(2))
             MLWORK = MAX(MLWORK,MWRSVJ)
             MLRWRK = MAX(MLRWRK,N + MAX(7,INT(RDUMMY(1))))
             IF ( LQUERY ) THEN
                LWRSVJ = INT(ZWORK(1))
                OLWORK = MAX(OLWORK,LWRSVJ)
             END IF
         END SELECT
         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
             JOBZL = 'V'
         ELSE
             JOBZL = 'N'
         END IF
         ! Workspace calculation to the CGEEV call
         MWRKEV = MAX( 1, 2*N )
         MLWORK = MAX(MLWORK,MWRKEV)
         MLRWRK = MAX(MLRWRK,N+2*N)
         IF ( LQUERY ) THEN
             CALL CGEEV( 'N', JOBZL, N, S, LDS, EIGS, &
              W, LDW, W, LDW, ZWORK, -1, RWORK, INFO1 ) ! LAPACK CALL
                LWRKEV = INT(ZWORK(1))
                OLWORK = MAX( OLWORK, LWRKEV )
                OLWORK = MAX( 2, OLWORK )
         END IF
 !
         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -30
         IF ( LRWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -28
         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -26

      END IF
 !
      IF( INFO /= 0 ) THEN
         CALL XERBLA( 'CGEDMD', -INFO )
         RETURN
      ELSE IF ( LQUERY ) THEN
 !     Return minimal and optimal workspace sizes
          IWORK(1) = IMINWR
          RWORK(1) = MLRWRK
          ZWORK(1) = MLWORK
          ZWORK(2) = OLWORK
          RETURN
      END IF
 !............................................................
 !
      OFL   = SLAMCH('O')*SLAMCH('P')
      SMALL = SLAMCH('S')
      BADXY = .FALSE.
 !
 !     <1> Optional scaling of the snapshots (columns of X, Y)
 !     ==========================================================
      IF ( SCCOLX ) THEN
          ! The columns of X will be normalized.
          ! To prevent overflows, the column norms of X are
          ! carefully computed using CLASSQ.
          K = 0
          DO i = 1, N
            !WORK(i) = SCNRM2( M, X(1,i), 1 )
            SCALE  = ZERO
            CALL CLASSQ( M, X(1,i), 1, SCALE, SSUM )
            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
                K    =  0
                INFO = -8
                CALL XERBLA('CGEDMD',-INFO)
            END IF
            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
               ROOTSC = SQRT(SSUM)
               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
 !                 Norm of X(:,i) overflows. First, X(:,i)
 !                 is scaled by
 !                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
 !                 Next, the norm of X(:,i) is stored without
 !                 overflow as WORK(i) = - SCALE * (ROOTSC/M),
 !                 the minus sign indicating the 1/M factor.
 !                 Scaling is performed without overflow, and
 !                 underflow may occur in the smallest entries
 !                 of X(:,i). The relative backward and forward
 !                 errors are small in the ell_2 norm.
                  CALL CLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
                               M, 1, X(1,i), LDX, INFO2 )
                  RWORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
               ELSE
 !                 X(:,i) will be scaled to unit 2-norm
                  RWORK(i) =   SCALE * ROOTSC
                  CALL CLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
                               X(1,i), LDX, INFO2 )             ! LAPACK CALL
 !                 X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)          ! INTRINSIC
               END IF
            ELSE
               RWORK(i) = ZERO
               K = K + 1
            END IF
          END DO
          IF ( K == N ) THEN
          ! All columns of X are zero. Return error code -8.
          ! (the 8th input variable had an illegal value)
          K = 0
          INFO = -8
          CALL XERBLA('CGEDMD',-INFO)
          RETURN
          END IF
          DO i = 1, N
 !           Now, apply the same scaling to the columns of Y.
            IF ( RWORK(i) >  ZERO ) THEN
                CALL CSSCAL( M, ONE/RWORK(i), Y(1,i), 1 ) ! BLAS CALL
 !               Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)      ! INTRINSIC
            ELSE IF ( RWORK(i) < ZERO ) THEN
                CALL CLASCL( 'G', 0, 0, -RWORK(i),          &
                     ONE/FLOAT(M), M, 1, Y(1,i), LDY, INFO2 ) ! LAPACK CALL
            ELSE IF ( ABS(Y(ICAMAX(M, Y(1,i),1),i ))  &
                                            /= ZERO ) THEN
 !               X(:,i) is zero vector. For consistency,
 !               Y(:,i) should also be zero. If Y(:,i) is not
 !               zero, then the data might be inconsistent or
 !               corrupted. If JOBS == 'C', Y(:,i) is set to
 !               zero and a warning flag is raised.
 !               The computation continues but the
 !               situation will be reported in the output.
                BADXY = .TRUE.
                IF ( LSAME(JOBS,'C')) &
                CALL CSSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
            END IF
          END DO
      END IF
  !
      IF ( SCCOLY ) THEN
          ! The columns of Y will be normalized.
          ! To prevent overflows, the column norms of Y are
          ! carefully computed using CLASSQ.
          DO i = 1, N
            !RWORK(i) = SCNRM2( M, Y(1,i), 1 )
            SCALE  = ZERO
            CALL CLASSQ( M, Y(1,i), 1, SCALE, SSUM )
            IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN
                K    =  0
                INFO = -10
                CALL XERBLA('CGEDMD',-INFO)
            END IF
            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
               ROOTSC = SQRT(SSUM)
               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
 !                 Norm of Y(:,i) overflows. First, Y(:,i)
 !                 is scaled by
 !                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
 !                 Next, the norm of Y(:,i) is stored without
 !                 overflow as RWORK(i) = - SCALE * (ROOTSC/M),
 !                 the minus sign indicating the 1/M factor.
 !                 Scaling is performed without overflow, and
 !                 underflow may occur in the smallest entries
 !                 of Y(:,i). The relative backward and forward
 !                 errors are small in the ell_2 norm.
                  CALL CLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
                               M, 1, Y(1,i), LDY, INFO2 )
                  RWORK(i) = - SCALE * ( ROOTSC / FLOAT(M) )
               ELSE
 !                 Y(:,i) will be scaled to unit 2-norm
                  RWORK(i) =   SCALE * ROOTSC
                  CALL CLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
                               Y(1,i), LDY, INFO2 )              ! LAPACK CALL
 !                 Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)          ! INTRINSIC
               END IF
            ELSE
               RWORK(i) = ZERO
            END IF
         END DO
         DO i = 1, N
 !           Now, apply the same scaling to the columns of X.
            IF ( RWORK(i) >  ZERO ) THEN
                CALL CSSCAL( M, ONE/RWORK(i), X(1,i), 1 )  ! BLAS CALL
 !               X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)      ! INTRINSIC
            ELSE IF ( RWORK(i) < ZERO ) THEN
                CALL CLASCL( 'G', 0, 0, -RWORK(i),          &
                     ONE/FLOAT(M), M, 1, X(1,i), LDX, INFO2 ) ! LAPACK CALL
            ELSE IF ( ABS(X(ICAMAX(M, X(1,i),1),i ))  &
                                           /= ZERO ) THEN
 !               Y(:,i) is zero vector.  If X(:,i) is not
 !               zero, then a warning flag is raised.
 !               The computation continues but the
 !               situation will be reported in the output.
                BADXY = .TRUE.
            END IF
         END DO
       END IF
 !
 !     <2> SVD of the data snapshot matrix X.
 !     =====================================
 !     The left singular vectors are stored in the array X.
 !     The right singular vectors are in the array W.
 !     The array W will later on contain the eigenvectors
 !     of a Rayleigh quotient.
      NUMRNK = N
      SELECT CASE ( WHTSVD )
         CASE (1)
             CALL CGESVD( 'O', 'S', M, N, X, LDX, RWORK, B, &
                  LDB, W, LDW, ZWORK, LZWORK,  RWORK(N+1), INFO1 ) ! LAPACK CALL
             T_OR_N = 'C'
         CASE (2)
            CALL CGESDD( 'O', M, N, X, LDX, RWORK, B, LDB, W, &
                 LDW, ZWORK, LZWORK, RWORK(N+1), IWORK, INFO1 )   ! LAPACK CALL
            T_OR_N = 'C'
         CASE (3)
              CALL CGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
                   X, LDX, RWORK, Z, LDZ, W, LDW, &
                   NUMRNK, IWORK, LIWORK, ZWORK,     &
                   LZWORK, RWORK(N+1), LRWORK-N, INFO1)     ! LAPACK CALL
              CALL CLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
         T_OR_N = 'C'
         CASE (4)
              CALL CGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, &
                   N, X, LDX, RWORK, Z, LDZ, W, LDW, &
                   ZWORK, LZWORK, RWORK(N+1), LRWORK-N, IWORK, INFO1 )    ! LAPACK CALL
              CALL CLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
              T_OR_N = 'N'
              XSCL1 = RWORK(N+1)
              XSCL2 = RWORK(N+2)
              IF ( XSCL1 /=  XSCL2 ) THEN
                 ! This is an exceptional situation. If the
                 ! data matrices are not scaled and the
                 ! largest singular value of X overflows.
                 ! In that case CGEJSV can return the SVD
                 ! in scaled form. The scaling factor can be used
                 ! to rescale the data (X and Y).
                 CALL CLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
              END IF
      END SELECT
 !
      IF ( INFO1 > 0 ) THEN
         ! The SVD selected subroutine did not converge.
         ! Return with an error code.
         INFO = 2
         RETURN
      END IF
 !
      IF ( RWORK(1) == ZERO ) THEN
          ! The largest computed singular value of (scaled)
          ! X is zero. Return error code -8
          ! (the 8th input variable had an illegal value).
          K = 0
          INFO = -8
          CALL XERBLA('CGEDMD',-INFO)
          RETURN
      END IF
 !
      !<3> Determine the numerical rank of the data
      !    snapshots matrix X. This depends on the
      !    parameters NRNK and TOL.

      SELECT CASE ( NRNK )
          CASE ( -1 )
               K = 1
               DO i = 2, NUMRNK
                 IF ( ( RWORK(i) <= RWORK(1)*TOL ) .OR. &
                      ( RWORK(i) <= SMALL ) ) EXIT
                 K = K + 1
               END DO
          CASE ( -2 )
               K = 1
               DO i = 1, NUMRNK-1
                 IF ( ( RWORK(i+1) <= RWORK(i)*TOL  ) .OR. &
                      ( RWORK(i) <= SMALL ) ) EXIT
                 K = K + 1
               END DO
          CASE DEFAULT
               K = 1
               DO i = 2, NRNK
                  IF ( RWORK(i) <= SMALL ) EXIT
                  K = K + 1
               END DO
          END SELECT
      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
      !   snapshot data in the input matrix X.

      !<4> Compute the Rayleigh quotient S = U^H * A * U.
      !    Depending on the requested outputs, the computation
      !    is organized to compute additional auxiliary
      !    matrices (for the residuals and refinements).
      !
      !    In all formulas below, we need V_k*Sigma_k^(-1)
      !    where either V_k is in W(1:N,1:K), or V_k^H is in
      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
      IF ( LSAME(T_OR_N, 'N') ) THEN
          DO i = 1, K
           CALL CSSCAL( N, ONE/RWORK(i), W(1,i), 1 )   ! BLAS CALL
           ! W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i)      ! INTRINSIC
          END DO
      ELSE
          ! This non-unit stride access is due to the fact
          ! that CGESVD, CGESVDQ and CGESDD return the
          ! adjoint matrix of the right singular vectors.
          !DO i = 1, K
          ! CALL DSCAL( N, ONE/RWORK(i), W(i,1), LDW )  ! BLAS CALL
          ! ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N)      ! INTRINSIC
          !END DO
          DO i = 1, K
              RWORK(N+i) = ONE/RWORK(i)
          END DO
          DO j = 1, N
             DO i = 1, K
                 W(i,j) = CMPLX(RWORK(N+i),ZERO,KIND=WP)*W(i,j)
             END DO
          END DO
      END IF
 !
      IF ( WNTREF ) THEN
         !
         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
         ! for computing the refined Ritz vectors
         ! (optionally, outside CGEDMD).
          CALL CGEMM( 'N', T_OR_N, M, K, N, ZONE, Y, LDY, W, &
                      LDW, ZZERO, Z, LDZ )                       ! BLAS CALL
          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N)))  ! INTRINSIC, for T_OR_N=='T'
          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))             ! INTRINSIC, for T_OR_N=='N'
          !
          ! At this point Z contains
          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
          ! this is needed for computing the residuals.
          ! This matrix is  returned in the array B and
          ! it can be used to compute refined Ritz vectors.
          CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC

          CALL CGEMM( 'C', 'N', K, K, M, ZONE, X, LDX, Z, &
                      LDZ, ZZERO, S, LDS )                       ! BLAS CALL
          ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC
          ! At this point S = U^H * A * U is the Rayleigh quotient.
      ELSE
        ! A * U(:,1:K) is not explicitly needed and the
        ! computation is organized differently. The Rayleigh
        ! quotient is computed more efficiently.
        CALL CGEMM( 'C', 'N', K, N, M, ZONE, X, LDX, Y, LDY, &
                   ZZERO, Z, LDZ )                                  ! BLAS CALL
        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) )  ! INTRINSIC
        !
        CALL CGEMM( 'N', T_OR_N, K, K, N, ZONE, Z, LDZ, W, &
                    LDW, ZZERO, S, LDS )                        ! BLAS CALL
        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T'
        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))          ! INTRINSIC, for T_OR_N=='N'
        ! At this point S = U^H * A * U is the Rayleigh quotient.
        ! If the residuals are requested, save scaled V_k into Z.
        ! Recall that V_k or V_k^H is stored in W.
        IF ( WNTRES .OR. WNTEX ) THEN
          IF ( LSAME(T_OR_N, 'N') ) THEN
              CALL CLACPY( 'A', N, K, W, LDW, Z, LDZ )
          ELSE
              CALL CLACPY( 'A', K, N, W, LDW, Z, LDZ )
          END IF
        END IF
      END IF
 !
      !<5> Compute the Ritz values and (if requested) the
      !   right eigenvectors of the Rayleigh quotient.
      !
      CALL CGEEV( 'N', JOBZL, K, S, LDS, EIGS, W, &
           LDW, W, LDW, ZWORK, LZWORK, RWORK(N+1), INFO1 )  ! LAPACK CALL
      !
      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
      ! quotient.  See the description of Z.
      ! Also, see the description of CGEEV.
      IF ( INFO1 > 0 ) THEN
         ! CGEEV failed to compute the eigenvalues and
         ! eigenvectors of the Rayleigh quotient.
         INFO = 3
         RETURN
      END IF
 !
      ! <6> Compute the eigenvectors (if requested) and,
      ! the residuals (if requested).
      !
      IF ( WNTVEC .OR. WNTEX ) THEN
          IF ( WNTRES ) THEN
              IF ( WNTREF ) THEN
                ! Here, if the refinement is requested, we have
                ! A*U(:,1:K) already computed and stored in Z.
                ! For the residuals, need Y = A * U(:,1;K) * W.
                CALL CGEMM( 'N', 'N', M, K, K, ZONE, Z, LDZ, W, &
                           LDW, ZZERO, Y, LDY )              ! BLAS CALL
                ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)       ! INTRINSIC
                ! This frees Z; Y contains A * U(:,1:K) * W.
              ELSE
                ! Compute S = V_k * Sigma_k^(-1) * W, where
                ! V_k * Sigma_k^(-1) (or its adjoint) is stored in Z
                CALL CGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
                           W, LDW, ZZERO, S, LDS)
                ! Then, compute Z = Y * S =
                ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
                ! = A * U(:,1:K) * W(1:K,1:K)
                CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
                           LDS, ZZERO, Z, LDZ)
                ! Save a copy of Z into Y and free Z for holding
                ! the Ritz vectors.
                CALL CLACPY( 'A', M, K, Z, LDZ, Y, LDY )
                IF ( WNTEX ) CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB )
              END IF
          ELSE IF ( WNTEX ) THEN
              ! Compute S = V_k * Sigma_k^(-1) * W, where
                ! V_k * Sigma_k^(-1) is stored in Z
                CALL CGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
                           W, LDW, ZZERO, S, LDS)
                ! Then, compute Z = Y * S =
                ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
                ! = A * U(:,1:K) * W(1:K,1:K)
                CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
                           LDS, ZZERO, B, LDB)
                ! The above call replaces the following two calls
                ! that were used in the developing-testing phase.
                ! CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
                !           LDS, ZZERO, Z, LDZ)
                ! Save a copy of Z into Y and free Z for holding
                ! the Ritz vectors.
                ! CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB )
          END IF
 !
          ! Compute the Ritz vectors
          IF ( WNTVEC ) CALL CGEMM( 'N', 'N', M, K, K, ZONE, X, LDX, W, LDW, &
                       ZZERO, Z, LDZ )                          ! BLAS CALL
          ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
 !
          IF ( WNTRES ) THEN
             DO i = 1, K
                CALL CAXPY( M, -EIGS(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
                ! Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i)            ! INTRINSIC
                RES(i) = SCNRM2( M, Y(1,i), 1)                        ! BLAS CALL
             END DO
          END IF
      END IF
 !
      IF ( WHTSVD == 4 ) THEN
          RWORK(N+1) = XSCL1
          RWORK(N+2) = XSCL2
      END IF
 !
 !     Successful exit.
      IF ( .NOT. BADXY ) THEN
         INFO = 0
      ELSE
         ! A warning on possible data inconsistency.
         ! This should be a rare event.
         INFO = 4
      END IF
 !............................................................
      RETURN
 !     ......
      END SUBROUTINE CGEDMD

--- a/lapack-netlib/SRC/cgedmdq.f90
+++ b/lapack-netlib/SRC/cgedmdq.f90
@@ -0,0 +1,689 @@
 SUBROUTINE CGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
                    LDY,   NRNK,  TOL,   K,  EIGS,         &
                    Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
                    S, LDS, ZWORK, LZWORK, WORK,  LWORK,   &
                    IWORK, LIWORK, INFO )
 ! March 2023
 !.....
      USE                   iso_fortran_env
      IMPLICIT NONE 
      INTEGER, PARAMETER :: WP = real32
 !.....      
 !     Scalar arguments       
      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
                                JOBT, JOBF
      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
                                LDY, NRNK, LDZ, LDB, LDV,  &
                                LDS, LZWORK,  LWORK, LIWORK
      INTEGER,   INTENT(OUT) :: INFO,   K      
      REAL(KIND=WP), INTENT(IN)    ::   TOL     
 !     Array arguments      
      COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*)
      COMPLEX(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*), &
                                         Z(LDZ,*), B(LDB,*), &
                                         V(LDV,*), S(LDS,*)
      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
      REAL(KIND=WP), INTENT(OUT)   :: RES(*)
      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
      INTEGER,       INTENT(OUT)   :: IWORK(*)
 !.....      
 !     Purpose  
 !     =======
 !     CGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
 !     a pair of data snapshot matrices, using a QR factorization
 !     based compression of the data. For the input matrices
 !     X and Y such that Y = A*X with an unaccessible matrix
 !     A, CGEDMDQ computes a certain number of Ritz pairs of A using
 !     the standard Rayleigh-Ritz extraction from a subspace of
 !     range(X) that is determined using the leading left singular 
 !     vectors of X. Optionally, CGEDMDQ returns the residuals 
 !     of the computed Ritz pairs, the information needed for
 !     a refinement of the Ritz vectors, or the eigenvectors of
 !     the Exact DMD.
 !     For further details see the references listed
 !     below. For more details of the implementation see [3].      
 !
 !     References
 !     ==========
 !     [1] P. Schmid: Dynamic mode decomposition of numerical
 !         and experimental data,
 !         Journal of Fluid Mechanics 656, 5-28, 2010.
 !     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
 !         decompositions: analysis and enhancements,
 !         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
 !     [3] Z. Drmac: A LAPACK implementation of the Dynamic
 !         Mode Decomposition I. Technical report. AIMDyn Inc.
 !         and LAPACK Working Note 298.      
 !     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. 
 !         Brunton, N. Kutz: On Dynamic Mode Decomposition:
 !         Theory and Applications, Journal of Computational
 !         Dynamics 1(2), 391 -421, 2014.
 !
 !     Developed and supported by:
 !     ===========================
 !     Developed and coded by Zlatko Drmac, Faculty of Science,
 !     University of Zagreb;  drmac@math.hr
 !     In cooperation with
 !     AIMdyn Inc., Santa Barbara, CA.
 !     and supported by
 !     - DARPA SBIR project "Koopman Operator-Based Forecasting
 !     for Nonstationary Processes from Near-Term, Limited
 !     Observational Data" Contract No: W31P4Q-21-C-0007
 !     - DARPA PAI project "Physics-Informed Machine Learning
 !     Methodologies" Contract No: HR0011-18-9-0033
 !     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
 !     Framework for Space-Time Analysis of Process Dynamics"
 !     Contract No: HR0011-16-C-0116
 !     Any opinions, findings and conclusions or recommendations 
 !     expressed in this material are those of the author and 
 !     do not necessarily reflect the views of the DARPA SBIR 
 !     Program Office.      
 !============================================================
 !     Distribution Statement A: 
 !     Approved for Public Release, Distribution Unlimited.
 !     Cleared by DARPA on September 29, 2022      
 !============================================================      
 !......................................................................      
 !     Arguments
 !     =========
 !     JOBS (input) CHARACTER*1
 !     Determines whether the initial data snapshots are scaled
 !     by a diagonal matrix. The data snapshots are the columns
 !     of F. The leading N-1 columns of F are denoted X and the
 !     trailing N-1 columns are denoted Y. 
 !     'S' :: The data snapshots matrices X and Y are multiplied
 !            with a diagonal matrix D so that X*D has unit
 !            nonzero columns (in the Euclidean 2-norm)
 !     'C' :: The snapshots are scaled as with the 'S' option.
 !            If it is found that an i-th column of X is zero
 !            vector and the corresponding i-th column of Y is
 !            non-zero, then the i-th column of Y is set to
 !            zero and a warning flag is raised.
 !     'Y' :: The data snapshots matrices X and Y are multiplied
 !            by a diagonal matrix D so that Y*D has unit
 !            nonzero columns (in the Euclidean 2-norm)    
 !     'N' :: No data scaling.   
 !.....
 !     JOBZ (input) CHARACTER*1
 !     Determines whether the eigenvectors (Koopman modes) will
 !     be computed.
 !     'V' :: The eigenvectors (Koopman modes) will be computed
 !            and returned in the matrix Z.
 !            See the description of Z.
 !     'F' :: The eigenvectors (Koopman modes) will be returned
 !            in factored form as the product Z*V, where Z
 !            is orthonormal and V contains the eigenvectors
 !            of the corresponding Rayleigh quotient.
 !            See the descriptions of F, V, Z.
 !     'Q' :: The eigenvectors (Koopman modes) will be returned
 !            in factored form as the product Q*Z, where Z
 !            contains the eigenvectors of the compression of the
 !            underlying discretised operator onto the span of
 !            the data snapshots. See the descriptions of F, V, Z.   
 !            Q is from the inital QR facorization.    
 !     'N' :: The eigenvectors are not computed.  
 !.....      
 !     JOBR (input) CHARACTER*1 
 !     Determines whether to compute the residuals.
 !     'R' :: The residuals for the computed eigenpairs will
 !            be computed and stored in the array RES.
 !            See the description of RES.
 !            For this option to be legal, JOBZ must be 'V'.
 !     'N' :: The residuals are not computed.
 !.....
 !     JOBQ (input) CHARACTER*1 
 !     Specifies whether to explicitly compute and return the
 !     unitary matrix from the QR factorization.
 !     'Q' :: The matrix Q of the QR factorization of the data
 !            snapshot matrix is computed and stored in the
 !            array F. See the description of F.       
 !     'N' :: The matrix Q is not explicitly computed.
 !.....
 !     JOBT (input) CHARACTER*1 
 !     Specifies whether to return the upper triangular factor
 !     from the QR factorization.
 !     'R' :: The matrix R of the QR factorization of the data 
 !            snapshot matrix F is returned in the array Y.
 !            See the description of Y and Further details.       
 !     'N' :: The matrix R is not returned. 
 !.....
 !     JOBF (input) CHARACTER*1
 !     Specifies whether to store information needed for post-
 !     processing (e.g. computing refined Ritz vectors)
 !     'R' :: The matrix needed for the refinement of the Ritz
 !            vectors is computed and stored in the array B.
 !            See the description of B.
 !     'E' :: The unscaled eigenvectors of the Exact DMD are 
 !            computed and returned in the array B. See the
 !            description of B.
 !     'N' :: No eigenvector refinement data is computed.   
 !     To be useful on exit, this option needs JOBQ='Q'.    
 !.....
 !     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
 !     Allows for a selection of the SVD algorithm from the
 !     LAPACK library.
 !     1 :: CGESVD (the QR SVD algorithm)
 !     2 :: CGESDD (the Divide and Conquer algorithm; if enough
 !          workspace available, this is the fastest option)
 !     3 :: CGESVDQ (the preconditioned QR SVD  ; this and 4
 !          are the most accurate options)
 !     4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3
 !          are the most accurate options)
 !     For the four methods above, a significant difference in
 !     the accuracy of small singular values is possible if
 !     the snapshots vary in norm so that X is severely
 !     ill-conditioned. If small (smaller than EPS*||X||)
 !     singular values are of interest and JOBS=='N',  then
 !     the options (3, 4) give the most accurate results, where
 !     the option 4 is slightly better and with stronger 
 !     theoretical background.
 !     If JOBS=='S', i.e. the columns of X will be normalized,
 !     then all methods give nearly equally accurate results.
 !.....
 !     M (input) INTEGER, M >= 0 
 !     The state space dimension (the number of rows of F).
 !.....      
 !     N (input) INTEGER, 0 <= N <= M
 !     The number of data snapshots from a single trajectory,
 !     taken at equidistant discrete times. This is the 
 !     number of columns of F.
 !.....
 !     F (input/output) COMPLEX(KIND=WP) M-by-N array
 !     > On entry,
 !     the columns of F are the sequence of data snapshots 
 !     from a single trajectory, taken at equidistant discrete
 !     times. It is assumed that the column norms of F are 
 !     in the range of the normalized floating point numbers. 
 !     < On exit,
 !     If JOBQ == 'Q', the array F contains the orthogonal 
 !     matrix/factor of the QR factorization of the initial 
 !     data snapshots matrix F. See the description of JOBQ. 
 !     If JOBQ == 'N', the entries in F strictly below the main
 !     diagonal contain, column-wise, the information on the 
 !     Householder vectors, as returned by CGEQRF. The 
 !     remaining information to restore the orthogonal matrix
 !     of the initial QR factorization is stored in ZWORK(1:MIN(M,N)). 
 !     See the description of ZWORK.
 !.....
 !     LDF (input) INTEGER, LDF >= M 
 !     The leading dimension of the array F.
 !.....
 !     X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array
 !     X is used as workspace to hold representations of the
 !     leading N-1 snapshots in the orthonormal basis computed
 !     in the QR factorization of F.
 !     On exit, the leading K columns of X contain the leading
 !     K left singular vectors of the above described content
 !     of X. To lift them to the space of the left singular
 !     vectors U(:,1:K) of the input data, pre-multiply with the 
 !     Q factor from the initial QR factorization. 
 !     See the descriptions of F, K, V  and Z.
 !.....      
 !     LDX (input) INTEGER, LDX >= N  
 !     The leading dimension of the array X. 
 !.....
 !     Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N) array
 !     Y is used as workspace to hold representations of the
 !     trailing N-1 snapshots in the orthonormal basis computed
 !     in the QR factorization of F.
 !     On exit, 
 !     If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
 !     triangular factor from the QR factorization of the data
 !     snapshot matrix F.
 !.....      
 !     LDY (input) INTEGER , LDY >= N
 !     The leading dimension of the array Y.   
 !.....
 !     NRNK (input) INTEGER
 !     Determines the mode how to compute the numerical rank,
 !     i.e. how to truncate small singular values of the input
 !     matrix X. On input, if
 !     NRNK = -1 :: i-th singular value sigma(i) is truncated
 !                  if sigma(i) <= TOL*sigma(1)
 !                  This option is recommended.
 !     NRNK = -2 :: i-th singular value sigma(i) is truncated
 !                  if sigma(i) <= TOL*sigma(i-1)
 !                  This option is included for R&D purposes.
 !                  It requires highly accurate SVD, which
 !                  may not be feasible.      
 !     The numerical rank can be enforced by using positive 
 !     value of NRNK as follows: 
 !     0 < NRNK <= N-1 :: at most NRNK largest singular values
 !     will be used. If the number of the computed nonzero
 !     singular values is less than NRNK, then only those
 !     nonzero values will be used and the actually used
 !     dimension is less than NRNK. The actual number of
 !     the nonzero singular values is returned in the variable
 !     K. See the description of K.
 !.....
 !     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
 !     The tolerance for truncating small singular values.
 !     See the description of NRNK.  
 !.....
 !     K (output) INTEGER,  0 <= K <= N 
 !     The dimension of the SVD/POD basis for the leading N-1
 !     data snapshots (columns of F) and the number of the 
 !     computed Ritz pairs. The value of K is determined
 !     according to the rule set by the parameters NRNK and 
 !     TOL. See the descriptions of NRNK and TOL. 
 !.....
 !     EIGS (output) COMPLEX(KIND=WP) (N-1)-by-1 array
 !     The leading K (K<=N-1) entries of EIGS contain
 !     the computed eigenvalues (Ritz values).
 !     See the descriptions of K, and Z.
 !.....
 !     Z (workspace/output) COMPLEX(KIND=WP)  M-by-(N-1) array
 !     If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i)
 !     is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
 !     If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
 !     Z*V, where Z contains orthonormal matrix (the product of
 !     Q from the initial QR factorization and the SVD/POD_basis
 !     returned by CGEDMD in X) and the second factor (the 
 !     eigenvectors of the Rayleigh quotient) is in the array V, 
 !     as returned by CGEDMD. That is,  X(:,1:K)*V(:,i)
 !     is an eigenvector corresponding to EIGS(i). The columns 
 !     of V(1:K,1:K) are the computed eigenvectors of the 
 !     K-by-K Rayleigh quotient.  
 !     See the descriptions of EIGS, X and V.      
 !.....
 !     LDZ (input) INTEGER , LDZ >= M
 !     The leading dimension of the array Z.
 !.....
 !     RES (output) REAL(KIND=WP) (N-1)-by-1 array
 !     RES(1:K) contains the residuals for the K computed 
 !     Ritz pairs, 
 !     RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
 !     See the description of EIGS and Z.      
 !.....
 !     B (output) COMPLEX(KIND=WP)  MIN(M,N)-by-(N-1) array.
 !     IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
 !     be used for computing the refined vectors; see further 
 !     details in the provided references. 
 !     If JOBF == 'E', B(1:N,1;K) contains 
 !     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
 !     Exact DMD, up to scaling by the inverse eigenvalues.   
 !     In both cases, the content of B can be lifted to the 
 !     original dimension of the input data by pre-multiplying
 !     with the Q factor from the initial QR factorization. 
 !     Here A denotes a compression of the underlying operator.      
 !     See the descriptions of F and X.
 !     If JOBF =='N', then B is not referenced.
 !.....
 !     LDB (input) INTEGER, LDB >= MIN(M,N)
 !     The leading dimension of the array B.
 !.....
 !     V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
 !     On exit, V(1:K,1:K) V contains the K eigenvectors of
 !     the Rayleigh quotient. The Ritz vectors
 !     (returned in Z) are the product of Q from the initial QR
 !     factorization (see the description of F) X (see the 
 !     description of X) and V.
 !.....
 !     LDV (input) INTEGER, LDV >= N-1
 !     The leading dimension of the array V.
 !.....      
 !     S (output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
 !     The array S(1:K,1:K) is used for the matrix Rayleigh
 !     quotient. This content is overwritten during
 !     the eigenvalue decomposition by CGEEV.
 !     See the description of K.
 !.....
 !     LDS (input) INTEGER, LDS >= N-1        
 !     The leading dimension of the array S.
 !.....
 !     ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array
 !     On exit, 
 !     ZWORK(1:MIN(M,N)) contains the scalar factors of the 
 !     elementary reflectors as returned by CGEQRF of the 
 !     M-by-N input matrix F.   
 !     If the call to CGEDMDQ is only workspace query, then
 !     ZWORK(1) contains the minimal complex workspace length and
 !     ZWORK(2) is the optimal complex workspace length. 
 !     Hence, the length of work is at least 2.
 !     See the description of LZWORK.      
 !.....      
 !     LZWORK (input) INTEGER
 !     The minimal length of the  workspace vector ZWORK.
 !     LZWORK is calculated as follows:
 !     Let MLWQR  = N (minimal workspace for CGEQRF[M,N])
 !         MLWDMD = minimal workspace for CGEDMD (see the
 !                  description of LWORK in CGEDMD)
 !         MLWMQR = N (minimal workspace for 
 !                    ZUNMQR['L','N',M,N,N])
 !         MLWGQR = N (minimal workspace for ZUNGQR[M,N,N])
 !         MINMN  = MIN(M,N)      
 !     Then
 !     LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD)
 !     is further updated as follows:
 !        if   JOBZ == 'V' or JOBZ == 'F' THEN 
 !             LZWORK = MAX( LZWORK, MINMN+MLWMQR )
 !        if   JOBQ == 'Q' THEN
 !             LZWORK = MAX( ZLWORK, MINMN+MLWGQR)      
 !
 !.....      
 !     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
 !     On exit,
 !     WORK(1:N-1) contains the singular values of 
 !     the input submatrix F(1:M,1:N-1).
 !     If the call to CGEDMDQ is only workspace query, then
 !     WORK(1) contains the minimal workspace length and
 !     WORK(2) is the optimal workspace length. hence, the
 !     length of work is at least 2.
 !     See the description of LWORK.
 !.....
 !     LWORK (input) INTEGER
 !     The minimal length of the  workspace vector WORK.
 !     LWORK is the same as in CGEDMD, because in CGEDMDQ
 !     only CGEDMD requires real workspace for snapshots
 !     of dimensions MIN(M,N)-by-(N-1).
 !     If on entry LWORK = -1, then a workspace query is
 !     assumed and the procedure only computes the minimal
 !     and the optimal workspace lengths for both WORK and
 !     IWORK. See the descriptions of WORK and IWORK.          
 !.....
 !     IWORK (workspace/output) INTEGER LIWORK-by-1 array
 !     Workspace that is required only if WHTSVD equals
 !     2 , 3 or 4. (See the description of WHTSVD).
 !     If on entry LWORK =-1 or LIWORK=-1, then the
 !     minimal length of IWORK is computed and returned in
 !     IWORK(1). See the description of LIWORK.
 !.....
 !     LIWORK (input) INTEGER
 !     The minimal length of the workspace vector IWORK.
 !     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
 !     Let M1=MIN(M,N), N1=N-1. Then      
 !     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
 !     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
 !     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
 !     If on entry LIWORK = -1, then a workspace query is
 !     assumed and the procedure only computes the minimal
 !     and the optimal workspace lengths for both WORK and
 !     IWORK. See the descriptions of WORK and IWORK.
 !..... 
 !     INFO (output) INTEGER
 !     -i < 0 :: On entry, the i-th argument had an
 !               illegal value
 !        = 0 :: Successful return.
 !        = 1 :: Void input. Quick exit (M=0 or N=0).
 !        = 2 :: The SVD computation of X did not converge.
 !               Suggestion: Check the input data and/or
 !               repeat with different WHTSVD.
 !        = 3 :: The computation of the eigenvalues did not
 !               converge.
 !        = 4 :: If data scaling was requested on input and
 !               the procedure found inconsistency in the data
 !               such that for some column index i,
 !               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
 !               to zero if JOBS=='C'. The computation proceeds
 !               with original or modified data and warning
 !               flag is set with INFO=4.  
 !.............................................................
 !.............................................................
 !     Parameters
 !     ~~~~~~~~~~      
      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
 !     COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
 !      
 !     Local scalars      
 !     ~~~~~~~~~~~~~
      INTEGER           :: IMINWR, INFO1,  MINMN, MLRWRK,   &
                           MLWDMD, MLWGQR, MLWMQR, MLWORK,  & 
                           MLWQR,  OLWDMD, OLWGQR, OLWMQR,  &
                           OLWORK, OLWQR
      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
                           WNTVCQ, WNTREF, WNTEX
      CHARACTER(LEN=1)  :: JOBVL
 !      
 !     External functions (BLAS and LAPACK)
 !     ~~~~~~~~~~~~~~~~~
      LOGICAL       LSAME
      EXTERNAL      LSAME 
 !
 !     External subroutines (BLAS and LAPACK)
 !     ~~~~~~~~~~~~~~~~~~~~
      EXTERNAL      CGEQRF, CLACPY, CLASET, CUNGQR, & 
                    CUNMQR, XERBLA

 !     External subroutines
 !     ~~~~~~~~~~~~~~~~~~~~
      EXTERNAL      CGEDMD 
      
 !     Intrinsic functions
 !     ~~~~~~~~~~~~~~~~~~~
      INTRINSIC      MAX, MIN, INT         
 !..........................................................  
 !
 !    Test the input arguments    
      WNTRES = LSAME(JOBR,'R')
      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
      SCCOLY = LSAME(JOBS,'Y')
      WNTVEC = LSAME(JOBZ,'V')
      WNTVCF = LSAME(JOBZ,'F')
      WNTVCQ = LSAME(JOBZ,'Q')
      WNTREF = LSAME(JOBF,'R') 
      WNTEX  = LSAME(JOBF,'E')
      WANTQ  = LSAME(JOBQ,'Q')
      WNTTRF = LSAME(JOBT,'R')     
      MINMN  = MIN(M,N)
      INFO = 0 
      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
 !       
      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR.                &
                                  LSAME(JOBS,'N')) )  THEN 
          INFO = -1
      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ       &
                              .OR. LSAME(JOBZ,'N')) ) THEN
          INFO = -2
      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    & 
          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
          INFO = -3
      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
           INFO = -4                 
      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
          INFO = -5
       ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             & 
                LSAME(JOBF,'N') ) )                     THEN
          INFO = -6    
      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
          INFO = -7
      ELSE IF ( M < 0 ) THEN
          INFO = -8
      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
          INFO = -9
      ELSE IF ( LDF < M ) THEN
          INFO = -11
      ELSE IF ( LDX < MINMN ) THEN
          INFO = -13
      ELSE IF ( LDY < MINMN ) THEN
          INFO = -15
      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    & 
                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
          INFO = -16
      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
          INFO = -17
      ELSE IF ( LDZ < M ) THEN
          INFO = -21
      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
          INFO = -24
      ELSE IF ( LDV < N-1 ) THEN
          INFO = -26
      ELSE IF ( LDS < N-1 ) THEN
          INFO = -28
      END IF
 !      
      IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN
          JOBVL = 'V'
      ELSE
          JOBVL = 'N'
      END IF     
      IF ( INFO == 0 ) THEN  
          ! Compute the minimal and the optimal workspace
          ! requirements. Simulate running the code and 
          ! determine minimal and optimal sizes of the 
          ! workspace at any moment of the run.         
         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
             ! All output except K is void. INFO=1 signals
             ! the void input. In case of a workspace query,
             ! the minimal workspace lengths are returned.
            IF ( LQUERY ) THEN  
               IWORK(1) = 1
                WORK(1) = 2
                WORK(2) = 2
            ELSE                
               K = 0
            END IF             
            INFO = 1  
            RETURN
         END IF     
         
         MLRWRK = 2
         MLWORK = 2
         OLWORK = 2 
         IMINWR = 1
         MLWQR  = MAX(1,N)  ! Minimal workspace length for CGEQRF.
         MLWORK = MAX(MLWORK,MINMN + MLWQR) 

         IF ( LQUERY ) THEN 
             CALL CGEQRF( M, N, F, LDF, ZWORK, ZWORK, -1, &
                          INFO1 )
             OLWQR  = INT(ZWORK(1))
             OLWORK = MAX(OLWORK,MINMN + OLWQR)           
         END IF
         CALL CGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& 
                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     & 
                      EIGS, Z, LDZ, RES,  B, LDB, V, LDV,    & 
                      S, LDS, ZWORK, LZWORK, WORK, -1, IWORK,&
                      LIWORK, INFO1 )
         MLWDMD = INT(ZWORK(1))
         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
         MLRWRK = MAX(MLRWRK, INT(WORK(1)))
         IMINWR = MAX(IMINWR, IWORK(1))
         IF ( LQUERY ) THEN 
             OLWDMD = INT(ZWORK(2))
             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
         END IF
         IF ( WNTVEC .OR. WNTVCF ) THEN
            MLWMQR = MAX(1,N) 
            MLWORK = MAX(MLWORK, MINMN+MLWMQR)
            IF ( LQUERY ) THEN
               CALL CUNMQR( 'L','N', M, N, MINMN, F, LDF,  & 
                            ZWORK, Z, LDZ, ZWORK, -1, INFO1 )
               OLWMQR = INT(ZWORK(1))
               OLWORK = MAX(OLWORK, MINMN+OLWMQR)
            END IF
         END IF  
         IF ( WANTQ ) THEN
            MLWGQR = MAX(1,N)
            MLWORK = MAX(MLWORK, MINMN+MLWGQR)
            IF ( LQUERY ) THEN 
                CALL CUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, &
                             ZWORK, -1, INFO1 )               
                OLWGQR = INT(ZWORK(1))
                OLWORK = MAX(OLWORK, MINMN+OLWGQR)
            END IF            
         END IF          
         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -34
         IF ( LWORK  < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -32
         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -30
      END IF  
      IF( INFO /= 0 ) THEN
         CALL XERBLA( 'CGEDMDQ', -INFO )
         RETURN
      ELSE IF ( LQUERY ) THEN
 !     Return minimal and optimal workspace sizes
          IWORK(1) = IMINWR
          ZWORK(1) = MLWORK
          ZWORK(2) = OLWORK
          WORK(1)  = MLRWRK
          WORK(2)  = MLRWRK
          RETURN
      END IF   
 !.....	  
 !     Initial QR factorization that is used to represent the
 !     snapshots as elements of lower dimensional subspace.
 !     For large scale computation with M >>N , at this place 
 !     one can use an out of core QRF.
 !   
      CALL CGEQRF( M, N, F, LDF, ZWORK,                & 
                   ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
 !      
 !     Define X and Y as the snapshots representations in the
 !     orthogonal basis computed in the QR factorization.
 !     X corresponds to the leading N-1 and Y to the trailing
 !     N-1 snapshots.
      CALL CLASET( 'L', MINMN, N-1, ZZERO,  ZZERO, X, LDX )
      CALL CLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
      CALL CLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
      IF ( M >= 3 ) THEN
          CALL CLASET( 'L', MINMN-2, N-2, ZZERO,  ZZERO, &
                       Y(3,1), LDY )  
      END IF
 !
 !     Compute the DMD of the projected snapshot pairs (X,Y)   
      CALL CGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN, &
                  N-1,  X, LDX, Y, LDY, NRNK,   TOL, K,    &
                  EIGS, Z, LDZ, RES, B,  LDB,   V, LDV,    &
                  S, LDS, ZWORK(MINMN+1), LZWORK-MINMN,    & 
                  WORK,   LWORK, IWORK, LIWORK, INFO1 )
      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
          ! Return with error code. See CGEDMD for details.
          INFO = INFO1
          RETURN
      ELSE
          INFO = INFO1
      END IF    
 !      
 !     The Ritz vectors (Koopman modes) can be explicitly 
 !     formed or returned in factored form.
      IF ( WNTVEC ) THEN
        ! Compute the eigenvectors explicitly.  
        IF ( M > MINMN ) CALL CLASET( 'A', M-MINMN, K, ZZERO, &
                                     ZZERO, Z(MINMN+1,1), LDZ )
        CALL CUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z,  &
             LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
      ELSE IF ( WNTVCF ) THEN   
        !   Return the Ritz vectors (eigenvectors) in factored
        !   form Z*V, where Z contains orthonormal matrix (the
        !   product of Q from the initial QR factorization and 
        !   the SVD/POD_basis returned by CGEDMD in X) and the 
        !   second factor (the eigenvectors of the Rayleigh 
        !   quotient) is in the array V, as returned by CGEDMD.
        CALL CLACPY( 'A', N, K, X, LDX, Z, LDZ )
        IF ( M > N ) CALL CLASET( 'A', M-N, K, ZZERO, ZZERO, & 
                                 Z(N+1,1), LDZ )
        CALL CUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, &
                    LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
      END IF
 !     
 !     Some optional output variables:
 !
 !     The upper triangular factor R in the initial QR 
 !     factorization is optionally returned in the array Y.
 !     This is useful if this call to CGEDMDQ is to be 
      
 !     followed by a streaming DMD that is implemented in a 
 !     QR compressed form.
      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y 
         CALL CLASET( 'A', MINMN, N, ZZERO,  ZZERO, Y, LDY )
         CALL CLACPY( 'U', MINMN, N, F, LDF,        Y, LDY )
      END IF    
 !
 !     The orthonormal/unitary factor Q in the initial QR 
 !     factorization is optionally returned in the array F. 
 !     Same as with the triangular factor above, this is 
 !     useful in a streaming DMD.
      IF ( WANTQ ) THEN                   ! Q overwrites F 
         CALL CUNGQR( M, MINMN, MINMN, F, LDF, ZWORK,     &
                      ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )  
      END IF
 !      
      RETURN
 !      
      END SUBROUTINE CGEDMDQ
    
--- a/lapack-netlib/SRC/dgedmd.f90
+++ b/lapack-netlib/SRC/dgedmd.f90
--- a/lapack-netlib/SRC/dgedmdq.f90
+++ b/lapack-netlib/SRC/dgedmdq.f90
@@ -0,0 +1,704 @@
 SUBROUTINE DGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
                    LDY,   NRNK,  TOL,   K,  REIG, IMEIG,  &
                    Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
                    S, LDS, WORK, LWORK, IWORK, LIWORK, INFO )
 ! March 2023
 !.....
      USE                   iso_fortran_env
      IMPLICIT NONE
      INTEGER, PARAMETER :: WP = real64      
 !.....      
 !     Scalar arguments       
      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
                                JOBT, JOBF
      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
                                LDY, NRNK, LDZ, LDB, LDV,  &
                                LDS, LWORK,  LIWORK
      INTEGER,   INTENT(OUT) :: INFO,    K      
      REAL(KIND=WP), INTENT(IN)    ::   TOL     
 !     Array arguments      
      REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*)
      REAL(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*),  &
                                      Z(LDZ,*), B(LDB,*),  &
                                      V(LDV,*), S(LDS,*)
      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*),  &
                                      RES(*)
      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
      INTEGER,       INTENT(OUT)   :: IWORK(*)
 !.....      
 !     Purpose  
 !     =======
 !     DGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
 !     a pair of data snapshot matrices, using a QR factorization
 !     based compression of the data. For the input matrices
 !     X and Y such that Y = A*X with an unaccessible matrix
 !     A, DGEDMDQ computes a certain number of Ritz pairs of A using
 !     the standard Rayleigh-Ritz extraction from a subspace of
 !     range(X) that is determined using the leading left singular 
 !     vectors of X. Optionally, DGEDMDQ returns the residuals 
 !     of the computed Ritz pairs, the information needed for
 !     a refinement of the Ritz vectors, or the eigenvectors of
 !     the Exact DMD.
 !     For further details see the references listed
 !     below. For more details of the implementation see [3].      
 !
 !     References
 !     ==========
 !     [1] P. Schmid: Dynamic mode decomposition of numerical
 !         and experimental data,
 !         Journal of Fluid Mechanics 656, 5-28, 2010.
 !     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
 !         decompositions: analysis and enhancements,
 !         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
 !     [3] Z. Drmac: A LAPACK implementation of the Dynamic
 !         Mode Decomposition I. Technical report. AIMDyn Inc.
 !         and LAPACK Working Note 298.      
 !     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. 
 !         Brunton, N. Kutz: On Dynamic Mode Decomposition:
 !         Theory and Applications, Journal of Computational
 !         Dynamics 1(2), 391 -421, 2014.
 !
 !     Developed and supported by:
 !     ===========================
 !     Developed and coded by Zlatko Drmac, Faculty of Science,
 !     University of Zagreb;  drmac@math.hr
 !     In cooperation with
 !     AIMdyn Inc., Santa Barbara, CA.
 !     and supported by
 !     - DARPA SBIR project "Koopman Operator-Based Forecasting
 !     for Nonstationary Processes from Near-Term, Limited
 !     Observational Data" Contract No: W31P4Q-21-C-0007
 !     - DARPA PAI project "Physics-Informed Machine Learning
 !     Methodologies" Contract No: HR0011-18-9-0033
 !     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
 !     Framework for Space-Time Analysis of Process Dynamics"
 !     Contract No: HR0011-16-C-0116
 !     Any opinions, findings and conclusions or recommendations 
 !     expressed in this material are those of the author and 
 !     do not necessarily reflect the views of the DARPA SBIR 
 !     Program Office.      
 !============================================================
 !     Distribution Statement A: 
 !     Approved for Public Release, Distribution Unlimited.
 !     Cleared by DARPA on September 29, 2022  
 !============================================================      
 !......................................................................      
 !     Arguments
 !     =========
 !     JOBS (input) CHARACTER*1
 !     Determines whether the initial data snapshots are scaled
 !     by a diagonal matrix. The data snapshots are the columns
 !     of F. The leading N-1 columns of F are denoted X and the
 !     trailing N-1 columns are denoted Y. 
 !     'S' :: The data snapshots matrices X and Y are multiplied
 !            with a diagonal matrix D so that X*D has unit
 !            nonzero columns (in the Euclidean 2-norm)
 !     'C' :: The snapshots are scaled as with the 'S' option.
 !            If it is found that an i-th column of X is zero
 !            vector and the corresponding i-th column of Y is
 !            non-zero, then the i-th column of Y is set to
 !            zero and a warning flag is raised.
 !     'Y' :: The data snapshots matrices X and Y are multiplied
 !            by a diagonal matrix D so that Y*D has unit
 !            nonzero columns (in the Euclidean 2-norm)    
 !     'N' :: No data scaling.   
 !.....
 !     JOBZ (input) CHARACTER*1
 !     Determines whether the eigenvectors (Koopman modes) will
 !     be computed.
 !     'V' :: The eigenvectors (Koopman modes) will be computed
 !            and returned in the matrix Z.
 !            See the description of Z.
 !     'F' :: The eigenvectors (Koopman modes) will be returned
 !            in factored form as the product Z*V, where Z
 !            is orthonormal and V contains the eigenvectors
 !            of the corresponding Rayleigh quotient.
 !            See the descriptions of F, V, Z.
 !     'Q' :: The eigenvectors (Koopman modes) will be returned
 !            in factored form as the product Q*Z, where Z
 !            contains the eigenvectors of the compression of the
 !            underlying discretized operator onto the span of
 !            the data snapshots. See the descriptions of F, V, Z.  
 !            Q is from the initial QR factorization.      
 !     'N' :: The eigenvectors are not computed.  
 !.....      
 !     JOBR (input) CHARACTER*1 
 !     Determines whether to compute the residuals.
 !     'R' :: The residuals for the computed eigenpairs will
 !            be computed and stored in the array RES.
 !            See the description of RES.
 !            For this option to be legal, JOBZ must be 'V'.
 !     'N' :: The residuals are not computed.
 !.....
 !     JOBQ (input) CHARACTER*1 
 !     Specifies whether to explicitly compute and return the
 !     orthogonal matrix from the QR factorization.
 !     'Q' :: The matrix Q of the QR factorization of the data
 !            snapshot matrix is computed and stored in the
 !            array F. See the description of F.       
 !     'N' :: The matrix Q is not explicitly computed.
 !.....
 !     JOBT (input) CHARACTER*1 
 !     Specifies whether to return the upper triangular factor
 !     from the QR factorization.
 !     'R' :: The matrix R of the QR factorization of the data 
 !            snapshot matrix F is returned in the array Y.
 !            See the description of Y and Further details.       
 !     'N' :: The matrix R is not returned.    
 !.....
 !     JOBF (input) CHARACTER*1
 !     Specifies whether to store information needed for post-
 !     processing (e.g. computing refined Ritz vectors)
 !     'R' :: The matrix needed for the refinement of the Ritz
 !            vectors is computed and stored in the array B.
 !            See the description of B.
 !     'E' :: The unscaled eigenvectors of the Exact DMD are 
 !            computed and returned in the array B. See the
 !            description of B.
 !     'N' :: No eigenvector refinement data is computed.
 !     To be useful on exit, this option needs JOBQ='Q'.
 !.....      
 !     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
 !     Allows for a selection of the SVD algorithm from the
 !     LAPACK library.
 !     1 :: DGESVD (the QR SVD algorithm)
 !     2 :: DGESDD (the Divide and Conquer algorithm; if enough
 !          workspace available, this is the fastest option)
 !     3 :: DGESVDQ (the preconditioned QR SVD  ; this and 4
 !          are the most accurate options)
 !     4 :: DGEJSV (the preconditioned Jacobi SVD; this and 3
 !          are the most accurate options)
 !     For the four methods above, a significant difference in
 !     the accuracy of small singular values is possible if
 !     the snapshots vary in norm so that X is severely
 !     ill-conditioned. If small (smaller than EPS*||X||)
 !     singular values are of interest and JOBS=='N',  then
 !     the options (3, 4) give the most accurate results, where
 !     the option 4 is slightly better and with stronger 
 !     theoretical background.
 !     If JOBS=='S', i.e. the columns of X will be normalized,
 !     then all methods give nearly equally accurate results.
 !.....
 !     M (input) INTEGER, M >= 0 
 !     The state space dimension (the number of rows of F).
 !.....      
 !     N (input) INTEGER, 0 <= N <= M
 !     The number of data snapshots from a single trajectory,
 !     taken at equidistant discrete times. This is the 
 !     number of columns of F.
 !.....
 !     F (input/output) REAL(KIND=WP) M-by-N array
 !     > On entry,
 !     the columns of F are the sequence of data snapshots 
 !     from a single trajectory, taken at equidistant discrete
 !     times. It is assumed that the column norms of F are 
 !     in the range of the normalized floating point numbers. 
 !     < On exit,
 !     If JOBQ == 'Q', the array F contains the orthogonal 
 !     matrix/factor of the QR factorization of the initial 
 !     data snapshots matrix F. See the description of JOBQ. 
 !     If JOBQ == 'N', the entries in F strictly below the main
 !     diagonal contain, column-wise, the information on the 
 !     Householder vectors, as returned by DGEQRF. The 
 !     remaining information to restore the orthogonal matrix
 !     of the initial QR factorization is stored in WORK(1:N). 
 !     See the description of WORK.
 !.....
 !     LDF (input) INTEGER, LDF >= M 
 !     The leading dimension of the array F.
 !.....
 !     X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
 !     X is used as workspace to hold representations of the
 !     leading N-1 snapshots in the orthonormal basis computed
 !     in the QR factorization of F.
 !     On exit, the leading K columns of X contain the leading
 !     K left singular vectors of the above described content
 !     of X. To lift them to the space of the left singular
 !     vectors U(:,1:K)of the input data, pre-multiply with the 
 !     Q factor from the initial QR factorization. 
 !     See the descriptions of F, K, V  and Z.
 !.....      
 !     LDX (input) INTEGER, LDX >= N  
 !     The leading dimension of the array X. 
 !.....
 !     Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
 !     Y is used as workspace to hold representations of the
 !     trailing N-1 snapshots in the orthonormal basis computed
 !     in the QR factorization of F.
 !     On exit, 
 !     If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
 !     triangular factor from the QR factorization of the data
 !     snapshot matrix F.
 !.....      
 !     LDY (input) INTEGER , LDY >= N
 !     The leading dimension of the array Y.   
 !.....
 !     NRNK (input) INTEGER
 !     Determines the mode how to compute the numerical rank,
 !     i.e. how to truncate small singular values of the input
 !     matrix X. On input, if
 !     NRNK = -1 :: i-th singular value sigma(i) is truncated
 !                  if sigma(i) <= TOL*sigma(1)
 !                  This option is recommended.  
 !     NRNK = -2 :: i-th singular value sigma(i) is truncated
 !                  if sigma(i) <= TOL*sigma(i-1)
 !                  This option is included for R&D purposes.
 !                  It requires highly accurate SVD, which
 !                  may not be feasible.       
 !     The numerical rank can be enforced by using positive 
 !     value of NRNK as follows: 
 !     0 < NRNK <= N-1 :: at most NRNK largest singular values
 !     will be used. If the number of the computed nonzero
 !     singular values is less than NRNK, then only those
 !     nonzero values will be used and the actually used
 !     dimension is less than NRNK. The actual number of
 !     the nonzero singular values is returned in the variable
 !     K. See the description of K.
 !.....
 !     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
 !     The tolerance for truncating small singular values.
 !     See the description of NRNK.  
 !.....
 !     K (output) INTEGER,  0 <= K <= N 
 !     The dimension of the SVD/POD basis for the leading N-1
 !     data snapshots (columns of F) and the number of the 
 !     computed Ritz pairs. The value of K is determined
 !     according to the rule set by the parameters NRNK and 
 !     TOL. See the descriptions of NRNK and TOL. 
 !.....
 !     REIG (output) REAL(KIND=WP) (N-1)-by-1 array
 !     The leading K (K<=N) entries of REIG contain
 !     the real parts of the computed eigenvalues
 !     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
 !     See the descriptions of K, IMEIG, Z.
 !.....
 !     IMEIG (output) REAL(KIND=WP) (N-1)-by-1 array
 !     The leading K (K<N) entries of REIG contain
 !     the imaginary parts of the computed eigenvalues
 !     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
 !     The eigenvalues are determined as follows:
 !     If IMEIG(i) == 0, then the corresponding eigenvalue is
 !     real, LAMBDA(i) = REIG(i).
 !     If IMEIG(i)>0, then the corresponding complex
 !     conjugate pair of eigenvalues reads
 !     LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
 !     LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
 !     That is, complex conjugate pairs have consequtive
 !     indices (i,i+1), with the positive imaginary part
 !     listed first.
 !     See the descriptions of K, REIG, Z.     
 !.....      
 !     Z (workspace/output) REAL(KIND=WP)  M-by-(N-1) array
 !     If JOBZ =='V' then
 !        Z contains real Ritz vectors as follows:
 !        If IMEIG(i)=0, then Z(:,i) is an eigenvector of
 !        the i-th Ritz value.
 !        If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
 !        [Z(:,i) Z(:,i+1)] span an invariant subspace and
 !        the Ritz values extracted from this subspace are
 !        REIG(i) + sqrt(-1)*IMEIG(i) and
 !        REIG(i) - sqrt(-1)*IMEIG(i).
 !        The corresponding eigenvectors are
 !        Z(:,i) + sqrt(-1)*Z(:,i+1) and
 !        Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
 !     If JOBZ == 'F', then the above descriptions hold for
 !     the columns of Z*V, where the columns of V are the
 !     eigenvectors of the K-by-K Rayleigh quotient, and Z is
 !     orthonormal. The columns of V are similarly structured:
 !     If IMEIG(i) == 0 then Z*V(:,i) is an eigenvector, and if 
 !     IMEIG(i) > 0 then Z*V(:,i)+sqrt(-1)*Z*V(:,i+1) and
 !                       Z*V(:,i)-sqrt(-1)*Z*V(:,i+1)
 !     are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
 !     See the descriptions of REIG, IMEIG, X and V.
 !.....
 !     LDZ (input) INTEGER , LDZ >= M
 !     The leading dimension of the array Z.
 !.....
 !     RES (output) REAL(KIND=WP) (N-1)-by-1 array
 !     RES(1:K) contains the residuals for the K computed 
 !     Ritz pairs.       
 !     If LAMBDA(i) is real, then
 !        RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
 !     If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
 !     then
 !     RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
 !     where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
 !               [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
 !     It holds that
 !     RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
 !     RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
 !     where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
 !           ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
 !     See the description of Z.
 !.....
 !     B (output) REAL(KIND=WP)  MIN(M,N)-by-(N-1) array.
 !     IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
 !     be used for computing the refined vectors; see further 
 !     details in the provided references. 
 !     If JOBF == 'E', B(1:N,1;K) contains 
 !     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
 !     Exact DMD, up to scaling by the inverse eigenvalues.   
 !     In both cases, the content of B can be lifted to the 
 !     original dimension of the input data by pre-multiplying
 !     with the Q factor from the initial QR factorization.
 !     Here A denotes a compression of the underlying operator.
 !     See the descriptions of F and X.
 !     If JOBF =='N', then B is not referenced.
 !.....
 !     LDB (input) INTEGER, LDB >= MIN(M,N)
 !     The leading dimension of the array B.
 !.....      
 !     V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array
 !     On exit, V(1:K,1:K) contains the K eigenvectors of
 !     the Rayleigh quotient. The eigenvectors of a complex
 !     conjugate pair of eigenvalues are returned in real form
 !     as explained in the description of Z. The Ritz vectors
 !     (returned in Z) are the product of X and V; see
 !     the descriptions of X and Z.
 !.....
 !     LDV (input) INTEGER, LDV >= N-1
 !     The leading dimension of the array V.
 !.....      
 !     S (output) REAL(KIND=WP) (N-1)-by-(N-1) array
 !     The array S(1:K,1:K) is used for the matrix Rayleigh
 !     quotient. This content is overwritten during
 !     the eigenvalue decomposition by DGEEV.
 !     See the description of K.
 !.....
 !     LDS (input) INTEGER, LDS >= N-1        
 !     The leading dimension of the array S.
 !.....
 !     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
 !     On exit, 
 !     WORK(1:MIN(M,N)) contains the scalar factors of the 
 !     elementary reflectors as returned by DGEQRF of the 
 !     M-by-N input matrix F.
 !     WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of 
 !     the input submatrix F(1:M,1:N-1).
 !     If the call to DGEDMDQ is only workspace query, then
 !     WORK(1) contains the minimal workspace length and
 !     WORK(2) is the optimal workspace length. Hence, the
 !     length of work is at least 2.
 !     See the description of LWORK.
 !.....
 !     LWORK (input) INTEGER
 !     The minimal length of the  workspace vector WORK.
 !     LWORK is calculated as follows:
 !     Let MLWQR  = N (minimal workspace for DGEQRF[M,N])
 !         MLWDMD = minimal workspace for DGEDMD (see the
 !                  description of LWORK in DGEDMD) for 
 !                  snapshots of dimensions MIN(M,N)-by-(N-1)
 !         MLWMQR = N (minimal workspace for 
 !                    DORMQR['L','N',M,N,N])
 !         MLWGQR = N (minimal workspace for DORGQR[M,N,N])
 !     Then
 !     LWORK = MAX(N+MLWQR, N+MLWDMD)
 !     is updated as follows:
 !        if   JOBZ == 'V' or JOBZ == 'F' THEN 
 !             LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWMQR )
 !        if   JOBQ == 'Q' THEN
 !             LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWGQR)
 !     If on entry LWORK = -1, then a workspace query is
 !     assumed and the procedure only computes the minimal
 !     and the optimal workspace lengths for both WORK and
 !     IWORK. See the descriptions of WORK and IWORK.          
 !.....
 !     IWORK (workspace/output) INTEGER LIWORK-by-1 array
 !     Workspace that is required only if WHTSVD equals
 !     2 , 3 or 4. (See the description of WHTSVD).
 !     If on entry LWORK =-1 or LIWORK=-1, then the
 !     minimal length of IWORK is computed and returned in
 !     IWORK(1). See the description of LIWORK.
 !.....
 !     LIWORK (input) INTEGER
 !     The minimal length of the workspace vector IWORK.
 !     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
 !     Let M1=MIN(M,N), N1=N-1. Then    
 !     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1))
 !     If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1)
 !     If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1)
 !     If on entry LIWORK = -1, then a workspace query is
 !     assumed and the procedure only computes the minimal
 !     and the optimal workspace lengths for both WORK and
 !     IWORK. See the descriptions of WORK and IWORK.
 !..... 
 !     INFO (output) INTEGER
 !     -i < 0 :: On entry, the i-th argument had an
 !               illegal value
 !        = 0 :: Successful return.
 !        = 1 :: Void input. Quick exit (M=0 or N=0).
 !        = 2 :: The SVD computation of X did not converge.
 !               Suggestion: Check the input data and/or
 !               repeat with different WHTSVD.
 !        = 3 :: The computation of the eigenvalues did not
 !               converge.
 !        = 4 :: If data scaling was requested on input and
 !               the procedure found inconsistency in the data
 !               such that for some column index i,
 !               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
 !               to zero if JOBS=='C'. The computation proceeds
 !               with original or modified data and warning
 !               flag is set with INFO=4.  
 !.............................................................
 !.............................................................
 !     Parameters
 !     ~~~~~~~~~~      
      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
 !      
 !     Local scalars      
 !     ~~~~~~~~~~~~~
      INTEGER           :: IMINWR, INFO1,  MLWDMD, MLWGQR, &
                           MLWMQR, MLWORK, MLWQR,  MINMN,  & 
                           OLWDMD, OLWGQR, OLWMQR, OLWORK, &
                           OLWQR
      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
                           WNTVCQ, WNTREF, WNTEX
      CHARACTER(LEN=1)  :: JOBVL
 !      
 !     Local array      
 !     ~~~~~~~~~~~      
      REAL(KIND=WP) :: RDUMMY(2)
 !      
 !     External functions (BLAS and LAPACK)
 !     ~~~~~~~~~~~~~~~~~
      LOGICAL       LSAME
      EXTERNAL      LSAME 
 !
 !     External subroutines (BLAS and LAPACK)
 !     ~~~~~~~~~~~~~~~~~~~~
      EXTERNAL      DGEMM 
      EXTERNAL      DGEQRF, DLACPY, DLASET, DORGQR, & 
                    DORMQR, XERBLA

 !     External subroutines
 !     ~~~~~~~~~~~~~~~~~~~~
      EXTERNAL      DGEDMD 
      
 !     Intrinsic functions
 !     ~~~~~~~~~~~~~~~~~~~
      INTRINSIC      MAX, MIN, INT         
 !..........................................................  
 !
 !    Test the input arguments    
      WNTRES = LSAME(JOBR,'R')
      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
      SCCOLY = LSAME(JOBS,'Y')
      WNTVEC = LSAME(JOBZ,'V')
      WNTVCF = LSAME(JOBZ,'F')
      WNTVCQ = LSAME(JOBZ,'Q')
      WNTREF = LSAME(JOBF,'R') 
      WNTEX  = LSAME(JOBF,'E')
      WANTQ  = LSAME(JOBQ,'Q')
      WNTTRF = LSAME(JOBT,'R')     
      MINMN  = MIN(M,N)
      INFO = 0 
      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
 !       
      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR.                &
                                  LSAME(JOBS,'N')) )  THEN 
          INFO = -1
      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ    &
                              .OR. LSAME(JOBZ,'N')) ) THEN
          INFO = -2
      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    & 
          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
          INFO = -3
      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
          INFO = -4                 
      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
          INFO = -5
      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             & 
                LSAME(JOBF,'N') ) )                    THEN
          INFO = -6    
      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
          INFO = -7
      ELSE IF ( M < 0 ) THEN
          INFO = -8
      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
          INFO = -9
      ELSE IF ( LDF < M ) THEN
          INFO = -11
      ELSE IF ( LDX < MINMN ) THEN
          INFO = -13
      ELSE IF ( LDY < MINMN ) THEN
          INFO = -15
      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    & 
                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
          INFO = -16
      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
          INFO = -17
      ELSE IF ( LDZ < M ) THEN
          INFO = -22
      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
          INFO = -25
      ELSE IF ( LDV < N-1 ) THEN
          INFO = -27
      ELSE IF ( LDS < N-1 ) THEN
          INFO = -29
      END IF
 !      
      IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN
          JOBVL = 'V'
      ELSE
          JOBVL = 'N'
      END IF     
      IF ( INFO == 0 ) THEN  
          ! Compute the minimal and the optimal workspace
          ! requirements. Simulate running the code and 
          ! determine minimal and optimal sizes of the 
          ! workspace at any moment of the run.         
         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
             ! All output except K is void. INFO=1 signals
             ! the void input. In case of a workspace query,
             ! the minimal workspace lengths are returned.
            IF ( LQUERY ) THEN  
               IWORK(1) = 1
                WORK(1) = 2
                WORK(2) = 2
            ELSE                
               K = 0
            END IF             
            INFO = 1  
            RETURN
         END IF     
         MLWQR  = MAX(1,N)  ! Minimal workspace length for DGEQRF.
         MLWORK = MINMN + MLWQR 
         IF ( LQUERY ) THEN 
             CALL DGEQRF( M, N, F, LDF, WORK, RDUMMY, -1, &
                          INFO1 )
             OLWQR  = INT(RDUMMY(1))
             OLWORK = MIN(M,N) + OLWQR           
         END IF
         CALL DGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& 
                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     & 
                      REIG, IMEIG, Z, LDZ, RES,  B, LDB,     & 
                      V, LDV, S, LDS, WORK, -1, IWORK,       &
                      LIWORK, INFO1 )
         MLWDMD = INT(WORK(1))
         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
         IMINWR = IWORK(1)
         IF ( LQUERY ) THEN 
             OLWDMD = INT(WORK(2))
             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
         END IF
         IF ( WNTVEC .OR. WNTVCF ) THEN
            MLWMQR = MAX(1,N) 
            MLWORK = MAX(MLWORK,MINMN+N-1+MLWMQR)
            IF ( LQUERY ) THEN
               CALL DORMQR( 'L','N', M, N, MINMN, F, LDF,  & 
                            WORK, Z, LDZ, WORK, -1, INFO1 )
               OLWMQR = INT(WORK(1))
               OLWORK = MAX(OLWORK,MINMN+N-1+OLWMQR)
            END IF
         END IF  
         IF ( WANTQ ) THEN
            MLWGQR = N
            MLWORK = MAX(MLWORK,MINMN+N-1+MLWGQR)
            IF ( LQUERY ) THEN 
                CALL DORGQR( M, MINMN, MINMN, F, LDF, WORK, &
                             WORK, -1, INFO1 )        
                OLWGQR = INT(WORK(1))
                OLWORK = MAX(OLWORK,MINMN+N-1+OLWGQR)
            END IF            
         END IF   
         IMINWR = MAX( 1, IMINWR )
         MLWORK = MAX( 2, MLWORK )      
         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -31
         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -33
      END IF  
      IF( INFO /= 0 ) THEN
         CALL XERBLA( 'DGEDMDQ', -INFO )
         RETURN
      ELSE IF ( LQUERY ) THEN
 !     Return minimal and optimal workspace sizes
          IWORK(1) = IMINWR
          WORK(1)  = MLWORK
          WORK(2)  = OLWORK
          RETURN
      END IF   
 !.....	  
 !     Initial QR factorization that is used to represent the
 !     snapshots as elements of lower dimensional subspace.
 !     For large scale computation with M >>N , at this place 
 !     one can use an out of core QRF.
 !   
      CALL DGEQRF( M, N, F, LDF, WORK,               & 
                   WORK(MINMN+1), LWORK-MINMN, INFO1 )
 !      
 !     Define X and Y as the snapshots representations in the
 !     orthogonal basis computed in the QR factorization.
 !     X corresponds to the leading N-1 and Y to the trailing
 !     N-1 snapshots.
      CALL DLASET( 'L', MINMN, N-1, ZERO,  ZERO, X, LDX )
      CALL DLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
      CALL DLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
      IF ( M >= 3 ) THEN
          CALL DLASET( 'L', MINMN-2, N-2, ZERO,  ZERO, &
                       Y(3,1), LDY )  
      END IF
 !
 !     Compute the DMD of the projected snapshot pairs (X,Y)   
      CALL DGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,  &
                   N-1, X, LDX, Y, LDY, NRNK,   TOL, K,     &
                   REIG, IMEIG, Z, LDZ, RES, B, LDB, V,     &
                   LDV, S, LDS, WORK(MINMN+1), LWORK-MINMN, & 
                   IWORK, LIWORK, INFO1 )
      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
          ! Return with error code. See DGEDMD for details.
          INFO = INFO1
          RETURN
      ELSE
          INFO = INFO1
      END IF    
 !      
 !     The Ritz vectors (Koopman modes) can be explicitly 
 !     formed or returned in factored form.
      IF ( WNTVEC ) THEN
        ! Compute the eigenvectors explicitly.  
        IF ( M > MINMN ) CALL DLASET( 'A', M-MINMN, K, ZERO, &
                                     ZERO, Z(MINMN+1,1), LDZ )
        CALL DORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
             LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
      ELSE IF ( WNTVCF ) THEN   
        !   Return the Ritz vectors (eigenvectors) in factored
        !   form Z*V, where Z contains orthonormal matrix (the
        !   product of Q from the initial QR factorization and 
        !   the SVD/POD_basis returned by DGEDMD in X) and the 
        !   second factor (the eigenvectors of the Rayleigh 
        !   quotient) is in the array V, as returned by DGEDMD.
        CALL DLACPY( 'A', N, K, X, LDX, Z, LDZ )
        IF ( M > N ) CALL DLASET( 'A', M-N, K, ZERO, ZERO,   & 
                                  Z(N+1,1), LDZ )
        CALL DORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
             LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
      END IF
 !     
 !     Some optional output variables:
 !
 !     The upper triangular factor R in the initial QR 
 !     factorization is optionally returned in the array Y.
 !     This is useful if this call to DGEDMDQ is to be 
 !     followed by a streaming DMD that is implemented in a 
 !     QR compressed form.
      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y 
         CALL DLASET( 'A', MINMN, N, ZERO,  ZERO, Y, LDY )
         CALL DLACPY( 'U', MINMN, N, F, LDF,      Y, LDY )
      END IF    
 !
 !     The orthonormal/orthogonal factor Q in the initial QR 
 !     factorization is optionally returned in the array F. 
 !     Same as with the triangular factor above, this is 
 !     useful in a streaming DMD.
      IF ( WANTQ ) THEN               ! Q overwrites F 
         CALL DORGQR( M, MINMN, MINMN, F, LDF, WORK, &
              WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )  
      END IF
 !      
      RETURN
 !      
      END SUBROUTINE DGEDMDQ
    
--- a/lapack-netlib/SRC/sgedmd.f90
+++ b/lapack-netlib/SRC/sgedmd.f90
--- a/lapack-netlib/SRC/sgedmdq.f90
+++ b/lapack-netlib/SRC/sgedmdq.f90
@@ -0,0 +1,703 @@
 SUBROUTINE SGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
                    LDY,   NRNK,  TOL,   K,  REIG, IMEIG,  &
                    Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
                    S, LDS, WORK, LWORK, IWORK, LIWORK, INFO )
 ! March 2023
 !.....
      USE                   iso_fortran_env 
      IMPLICIT NONE
      INTEGER, PARAMETER :: WP = real32     
 !.....      
 !     Scalar arguments       
      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
                                JOBT, JOBF
      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
                                LDY, NRNK, LDZ, LDB, LDV,  &
                                LDS, LWORK,  LIWORK
      INTEGER,   INTENT(OUT) :: INFO,   K      
      REAL(KIND=WP), INTENT(IN)    ::   TOL     
 !     Array arguments      
      REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*)
      REAL(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*),  &
                                      Z(LDZ,*), B(LDB,*),  &
                                      V(LDV,*), S(LDS,*)
      REAL(KIND=WP), INTENT(OUT)   :: REIG(*),  IMEIG(*),  &
                                      RES(*)
      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
      INTEGER,       INTENT(OUT)   :: IWORK(*)
 !.....      
 !     Purpose  
 !     =======
 !     SGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
 !     a pair of data snapshot matrices, using a QR factorization
 !     based compression of the data. For the input matrices
 !     X and Y such that Y = A*X with an unaccessible matrix
 !     A, SGEDMDQ computes a certain number of Ritz pairs of A using
 !     the standard Rayleigh-Ritz extraction from a subspace of
 !     range(X) that is determined using the leading left singular 
 !     vectors of X. Optionally, SGEDMDQ returns the residuals 
 !     of the computed Ritz pairs, the information needed for
 !     a refinement of the Ritz vectors, or the eigenvectors of
 !     the Exact DMD.
 !     For further details see the references listed
 !     below. For more details of the implementation see [3].      
 !
 !     References
 !     ==========
 !     [1] P. Schmid: Dynamic mode decomposition of numerical
 !         and experimental data,
 !         Journal of Fluid Mechanics 656, 5-28, 2010.
 !     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
 !         decompositions: analysis and enhancements,
 !         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
 !     [3] Z. Drmac: A LAPACK implementation of the Dynamic
 !         Mode Decomposition I. Technical report. AIMDyn Inc.
 !         and LAPACK Working Note 298.      
 !     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. 
 !         Brunton, N. Kutz: On Dynamic Mode Decomposition:
 !         Theory and Applications, Journal of Computational
 !         Dynamics 1(2), 391 -421, 2014.
 !
 !     Developed and supported by:
 !     ===========================
 !     Developed and coded by Zlatko Drmac, Faculty of Science,
 !     University of Zagreb;  drmac@math.hr
 !     In cooperation with
 !     AIMdyn Inc., Santa Barbara, CA.
 !     and supported by
 !     - DARPA SBIR project "Koopman Operator-Based Forecasting
 !     for Nonstationary Processes from Near-Term, Limited
 !     Observational Data" Contract No: W31P4Q-21-C-0007
 !     - DARPA PAI project "Physics-Informed Machine Learning
 !     Methodologies" Contract No: HR0011-18-9-0033
 !     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
 !     Framework for Space-Time Analysis of Process Dynamics"
 !     Contract No: HR0011-16-C-0116
 !     Any opinions, findings and conclusions or recommendations 
 !     expressed in this material are those of the author and 
 !     do not necessarily reflect the views of the DARPA SBIR 
 !     Program Office.      
 !============================================================
 !     Distribution Statement A: 
 !     Approved for Public Release, Distribution Unlimited.
 !     Cleared by DARPA on September 29, 2022
 !============================================================      
 !......................................................................      
 !     Arguments
 !     =========
 !     JOBS (input) CHARACTER*1
 !     Determines whether the initial data snapshots are scaled
 !     by a diagonal matrix. The data snapshots are the columns
 !     of F. The leading N-1 columns of F are denoted X and the
 !     trailing N-1 columns are denoted Y. 
 !     'S' :: The data snapshots matrices X and Y are multiplied
 !            with a diagonal matrix D so that X*D has unit
 !            nonzero columns (in the Euclidean 2-norm)
 !     'C' :: The snapshots are scaled as with the 'S' option.
 !            If it is found that an i-th column of X is zero
 !            vector and the corresponding i-th column of Y is
 !            non-zero, then the i-th column of Y is set to
 !            zero and a warning flag is raised.
 !     'Y' :: The data snapshots matrices X and Y are multiplied
 !            by a diagonal matrix D so that Y*D has unit
 !            nonzero columns (in the Euclidean 2-norm)    
 !     'N' :: No data scaling.   
 !.....
 !     JOBZ (input) CHARACTER*1
 !     Determines whether the eigenvectors (Koopman modes) will
 !     be computed.
 !     'V' :: The eigenvectors (Koopman modes) will be computed
 !            and returned in the matrix Z.
 !            See the description of Z.
 !     'F' :: The eigenvectors (Koopman modes) will be returned
 !            in factored form as the product Z*V, where Z
 !            is orthonormal and V contains the eigenvectors
 !            of the corresponding Rayleigh quotient.
 !            See the descriptions of F, V, Z.
 !     'Q' :: The eigenvectors (Koopman modes) will be returned
 !            in factored form as the product Q*Z, where Z
 !            contains the eigenvectors of the compression of the
 !            underlying discretized operator onto the span of
 !            the data snapshots. See the descriptions of F, V, Z. 
 !            Q is from the initial QR factorization.  
 !     'N' :: The eigenvectors are not computed.  
 !.....      
 !     JOBR (input) CHARACTER*1 
 !     Determines whether to compute the residuals.
 !     'R' :: The residuals for the computed eigenpairs will
 !            be computed and stored in the array RES.
 !            See the description of RES.
 !            For this option to be legal, JOBZ must be 'V'.
 !     'N' :: The residuals are not computed.
 !.....
 !     JOBQ (input) CHARACTER*1 
 !     Specifies whether to explicitly compute and return the
 !     orthogonal matrix from the QR factorization.
 !     'Q' :: The matrix Q of the QR factorization of the data
 !            snapshot matrix is computed and stored in the
 !            array F. See the description of F.       
 !     'N' :: The matrix Q is not explicitly computed.
 !.....
 !     JOBT (input) CHARACTER*1 
 !     Specifies whether to return the upper triangular factor
 !     from the QR factorization.
 !     'R' :: The matrix R of the QR factorization of the data 
 !            snapshot matrix F is returned in the array Y.
 !            See the description of Y and Further details.       
 !     'N' :: The matrix R is not returned.    
 !.....
 !     JOBF (input) CHARACTER*1
 !     Specifies whether to store information needed for post-
 !     processing (e.g. computing refined Ritz vectors)
 !     'R' :: The matrix needed for the refinement of the Ritz
 !            vectors is computed and stored in the array B.
 !            See the description of B.
 !     'E' :: The unscaled eigenvectors of the Exact DMD are 
 !            computed and returned in the array B. See the
 !            description of B.
 !     'N' :: No eigenvector refinement data is computed.   
 !     To be useful on exit, this option needs JOBQ='Q'.      
 !.....
 !     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
 !     Allows for a selection of the SVD algorithm from the
 !     LAPACK library.
 !     1 :: SGESVD (the QR SVD algorithm)
 !     2 :: SGESDD (the Divide and Conquer algorithm; if enough
 !          workspace available, this is the fastest option)
 !     3 :: SGESVDQ (the preconditioned QR SVD  ; this and 4
 !          are the most accurate options)
 !     4 :: SGEJSV (the preconditioned Jacobi SVD; this and 3
 !          are the most accurate options)
 !     For the four methods above, a significant difference in
 !     the accuracy of small singular values is possible if
 !     the snapshots vary in norm so that X is severely
 !     ill-conditioned. If small (smaller than EPS*||X||)
 !     singular values are of interest and JOBS=='N',  then
 !     the options (3, 4) give the most accurate results, where
 !     the option 4 is slightly better and with stronger 
 !     theoretical background.
 !     If JOBS=='S', i.e. the columns of X will be normalized,
 !     then all methods give nearly equally accurate results.
 !.....
 !     M (input) INTEGER, M >= 0 
 !     The state space dimension (the number of rows of F)
 !.....      
 !     N (input) INTEGER, 0 <= N <= M
 !     The number of data snapshots from a single trajectory,
 !     taken at equidistant discrete times. This is the 
 !     number of columns of F.
 !.....
 !     F (input/output) REAL(KIND=WP) M-by-N array
 !     > On entry,
 !     the columns of F are the sequence of data snapshots 
 !     from a single trajectory, taken at equidistant discrete
 !     times. It is assumed that the column norms of F are 
 !     in the range of the normalized floating point numbers. 
 !     < On exit,
 !     If JOBQ == 'Q', the array F contains the orthogonal 
 !     matrix/factor of the QR factorization of the initial 
 !     data snapshots matrix F. See the description of JOBQ. 
 !     If JOBQ == 'N', the entries in F strictly below the main
 !     diagonal contain, column-wise, the information on the 
 !     Householder vectors, as returned by SGEQRF. The 
 !     remaining information to restore the orthogonal matrix
 !     of the initial QR factorization is stored in WORK(1:N). 
 !     See the description of WORK.
 !.....
 !     LDF (input) INTEGER, LDF >= M 
 !     The leading dimension of the array F.
 !.....
 !     X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
 !     X is used as workspace to hold representations of the
 !     leading N-1 snapshots in the orthonormal basis computed
 !     in the QR factorization of F.
 !     On exit, the leading K columns of X contain the leading
 !     K left singular vectors of the above described content
 !     of X. To lift them to the space of the left singular
 !     vectors U(:,1:K)of the input data, pre-multiply with the 
 !     Q factor from the initial QR factorization. 
 !     See the descriptions of F, K, V  and Z.
 !.....      
 !     LDX (input) INTEGER, LDX >= N  
 !     The leading dimension of the array X 
 !.....
 !     Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array
 !     Y is used as workspace to hold representations of the
 !     trailing N-1 snapshots in the orthonormal basis computed
 !     in the QR factorization of F.
 !     On exit, 
 !     If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
 !     triangular factor from the QR factorization of the data
 !     snapshot matrix F.
 !.....      
 !     LDY (input) INTEGER , LDY >= N
 !     The leading dimension of the array Y   
 !.....
 !     NRNK (input) INTEGER
 !     Determines the mode how to compute the numerical rank,
 !     i.e. how to truncate small singular values of the input
 !     matrix X. On input, if
 !     NRNK = -1 :: i-th singular value sigma(i) is truncated
 !                  if sigma(i) <= TOL*sigma(1)
 !                  This option is recommended.   
 !     NRNK = -2 :: i-th singular value sigma(i) is truncated
 !                  if sigma(i) <= TOL*sigma(i-1)
 !                  This option is included for R&D purposes.
 !                  It requires highly accurate SVD, which
 !                  may not be feasible.     
 !     The numerical rank can be enforced by using positive 
 !     value of NRNK as follows: 
 !     0 < NRNK <= N-1 :: at most NRNK largest singular values
 !     will be used. If the number of the computed nonzero
 !     singular values is less than NRNK, then only those
 !     nonzero values will be used and the actually used
 !     dimension is less than NRNK. The actual number of
 !     the nonzero singular values is returned in the variable
 !     K. See the description of K.
 !.....
 !     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
 !     The tolerance for truncating small singular values.
 !     See the description of NRNK.  
 !.....
 !     K (output) INTEGER,  0 <= K <= N 
 !     The dimension of the SVD/POD basis for the leading N-1
 !     data snapshots (columns of F) and the number of the 
 !     computed Ritz pairs. The value of K is determined
 !     according to the rule set by the parameters NRNK and 
 !     TOL. See the descriptions of NRNK and TOL. 
 !.....
 !     REIG (output) REAL(KIND=WP) (N-1)-by-1 array
 !     The leading K (K<=N) entries of REIG contain
 !     the real parts of the computed eigenvalues
 !     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
 !     See the descriptions of K, IMEIG, Z.
 !.....
 !     IMEIG (output) REAL(KIND=WP) (N-1)-by-1 array
 !     The leading K (K<N) entries of REIG contain
 !     the imaginary parts of the computed eigenvalues
 !     REIG(1:K) + sqrt(-1)*IMEIG(1:K).
 !     The eigenvalues are determined as follows:
 !     If IMEIG(i) == 0, then the corresponding eigenvalue is
 !     real, LAMBDA(i) = REIG(i).
 !     If IMEIG(i)>0, then the corresponding complex
 !     conjugate pair of eigenvalues reads
 !     LAMBDA(i)   = REIG(i) + sqrt(-1)*IMAG(i)
 !     LAMBDA(i+1) = REIG(i) - sqrt(-1)*IMAG(i)
 !     That is, complex conjugate pairs have consecutive
 !     indices (i,i+1), with the positive imaginary part
 !     listed first.
 !     See the descriptions of K, REIG, Z.     
 !.....      
 !     Z (workspace/output) REAL(KIND=WP)  M-by-(N-1) array
 !     If JOBZ =='V' then
 !        Z contains real Ritz vectors as follows:
 !        If IMEIG(i)=0, then Z(:,i) is an eigenvector of
 !        the i-th Ritz value.
 !        If IMEIG(i) > 0 (and IMEIG(i+1) < 0) then
 !        [Z(:,i) Z(:,i+1)] span an invariant subspace and
 !        the Ritz values extracted from this subspace are
 !        REIG(i) + sqrt(-1)*IMEIG(i) and
 !        REIG(i) - sqrt(-1)*IMEIG(i).
 !        The corresponding eigenvectors are
 !        Z(:,i) + sqrt(-1)*Z(:,i+1) and
 !        Z(:,i) - sqrt(-1)*Z(:,i+1), respectively.
 !     If JOBZ == 'F', then the above descriptions hold for
 !     the columns of Z*V, where the columns of V are the
 !     eigenvectors of the K-by-K Rayleigh quotient, and Z is
 !     orthonormal. The columns of V are similarly structured:
 !     If IMEIG(i) == 0 then Z*V(:,i) is an eigenvector, and if 
 !     IMEIG(i) > 0 then Z*V(:,i)+sqrt(-1)*Z*V(:,i+1) and
 !                       Z*V(:,i)-sqrt(-1)*Z*V(:,i+1)
 !     are the eigenvectors of LAMBDA(i), LAMBDA(i+1).
 !     See the descriptions of REIG, IMEIG, X and V.
 !.....
 !     LDZ (input) INTEGER , LDZ >= M
 !     The leading dimension of the array Z.
 !.....
 !     RES (output) REAL(KIND=WP) (N-1)-by-1 array
 !     RES(1:K) contains the residuals for the K computed 
 !     Ritz pairs.       
 !     If LAMBDA(i) is real, then
 !        RES(i) = || A * Z(:,i) - LAMBDA(i)*Z(:,i))||_2.
 !     If [LAMBDA(i), LAMBDA(i+1)] is a complex conjugate pair
 !     then
 !     RES(i)=RES(i+1) = || A * Z(:,i:i+1) - Z(:,i:i+1) *B||_F
 !     where B = [ real(LAMBDA(i)) imag(LAMBDA(i)) ]
 !               [-imag(LAMBDA(i)) real(LAMBDA(i)) ].
 !     It holds that
 !     RES(i)   = || A*ZC(:,i)   - LAMBDA(i)  *ZC(:,i)   ||_2
 !     RES(i+1) = || A*ZC(:,i+1) - LAMBDA(i+1)*ZC(:,i+1) ||_2
 !     where ZC(:,i)   =  Z(:,i) + sqrt(-1)*Z(:,i+1)
 !           ZC(:,i+1) =  Z(:,i) - sqrt(-1)*Z(:,i+1)
 !     See the description of Z.
 !.....
 !     B (output) REAL(KIND=WP)  MIN(M,N)-by-(N-1) array.
 !     IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
 !     be used for computing the refined vectors; see further 
 !     details in the provided references. 
 !     If JOBF == 'E', B(1:N,1;K) contains 
 !     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
 !     Exact DMD, up to scaling by the inverse eigenvalues.   
 !     In both cases, the content of B can be lifted to the 
 !     original dimension of the input data by pre-multiplying
 !     with the Q factor from the initial QR factorization.     
 !     Here A denotes a compression of the underlying operator.      
 !     See the descriptions of F and X.
 !     If JOBF =='N', then B is not referenced.
 !.....
 !     LDB (input) INTEGER, LDB >= MIN(M,N)
 !     The leading dimension of the array B.
 !.....      
 !     V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array
 !     On exit, V(1:K,1:K) contains the K eigenvectors of
 !     the Rayleigh quotient. The eigenvectors of a complex
 !     conjugate pair of eigenvalues are returned in real form
 !     as explained in the description of Z. The Ritz vectors
 !     (returned in Z) are the product of X and V; see
 !     the descriptions of X and Z.
 !.....
 !     LDV (input) INTEGER, LDV >= N-1
 !     The leading dimension of the array V.
 !.....      
 !     S (output) REAL(KIND=WP) (N-1)-by-(N-1) array
 !     The array S(1:K,1:K) is used for the matrix Rayleigh
 !     quotient. This content is overwritten during
 !     the eigenvalue decomposition by SGEEV.
 !     See the description of K.
 !.....
 !     LDS (input) INTEGER, LDS >= N-1        
 !     The leading dimension of the array S.
 !.....
 !     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
 !     On exit, 
 !     WORK(1:MIN(M,N)) contains the scalar factors of the 
 !     elementary reflectors as returned by SGEQRF of the 
 !     M-by-N input matrix F.
 !     WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of 
 !     the input submatrix F(1:M,1:N-1).
 !     If the call to SGEDMDQ is only workspace query, then
 !     WORK(1) contains the minimal workspace length and
 !     WORK(2) is the optimal workspace length. Hence, the
 !     length of work is at least 2.
 !     See the description of LWORK.
 !.....
 !     LWORK (input) INTEGER
 !     The minimal length of the  workspace vector WORK.
 !     LWORK is calculated as follows:
 !     Let MLWQR  = N (minimal workspace for SGEQRF[M,N])
 !         MLWDMD = minimal workspace for SGEDMD (see the
 !                  description of LWORK in SGEDMD) for 
 !                  snapshots of dimensions MIN(M,N)-by-(N-1)
 !         MLWMQR = N (minimal workspace for 
 !                    SORMQR['L','N',M,N,N])
 !         MLWGQR = N (minimal workspace for SORGQR[M,N,N])
 !     Then
 !     LWORK = MAX(N+MLWQR, N+MLWDMD)
 !     is updated as follows:
 !        if   JOBZ == 'V' or JOBZ == 'F' THEN 
 !             LWORK = MAX( LWORK,MIN(M,N)+N-1 +MLWMQR )
 !        if   JOBQ == 'Q' THEN
 !             LWORK = MAX( LWORK,MIN(M,N)+N-1+MLWGQR)
 !     If on entry LWORK = -1, then a workspace query is
 !     assumed and the procedure only computes the minimal
 !     and the optimal workspace lengths for both WORK and
 !     IWORK. See the descriptions of WORK and IWORK.          
 !.....
 !     IWORK (workspace/output) INTEGER LIWORK-by-1 array
 !     Workspace that is required only if WHTSVD equals
 !     2 , 3 or 4. (See the description of WHTSVD).
 !     If on entry LWORK =-1 or LIWORK=-1, then the
 !     minimal length of IWORK is computed and returned in
 !     IWORK(1). See the description of LIWORK.
 !.....
 !     LIWORK (input) INTEGER
 !     The minimal length of the workspace vector IWORK.
 !     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
 !     Let M1=MIN(M,N), N1=N-1. Then
 !     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1))
 !     If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1)
 !     If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1)
 !     If on entry LIWORK = -1, then a worskpace query is
 !     assumed and the procedure only computes the minimal
 !     and the optimal workspace lengths for both WORK and
 !     IWORK. See the descriptions of WORK and IWORK.
 !..... 
 !     INFO (output) INTEGER
 !     -i < 0 :: On entry, the i-th argument had an
 !               illegal value
 !        = 0 :: Successful return.
 !        = 1 :: Void input. Quick exit (M=0 or N=0).
 !        = 2 :: The SVD computation of X did not converge.
 !               Suggestion: Check the input data and/or
 !               repeat with different WHTSVD.
 !        = 3 :: The computation of the eigenvalues did not
 !               converge.
 !        = 4 :: If data scaling was requested on input and
 !               the procedure found inconsistency in the data
 !               such that for some column index i,
 !               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
 !               to zero if JOBS=='C'. The computation proceeds
 !               with original or modified data and warning
 !               flag is set with INFO=4.  
 !.............................................................
 !.............................................................
 !     Parameters
 !     ~~~~~~~~~~      
      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
 !      
 !     Local scalars      
 !     ~~~~~~~~~~~~~
      INTEGER           :: IMINWR, INFO1,  MLWDMD, MLWGQR, &
                           MLWMQR, MLWORK, MLWQR,  MINMN,  & 
                           OLWDMD, OLWGQR, OLWMQR, OLWORK, &
                           OLWQR
      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
                           WNTVCQ, WNTREF, WNTEX
      CHARACTER(LEN=1)  :: JOBVL
 !      
 !     Local array      
 !     ~~~~~~~~~~~      
      REAL(KIND=WP) :: RDUMMY(2)
 !      
 !     External functions (BLAS and LAPACK)
 !     ~~~~~~~~~~~~~~~~~
      LOGICAL       LSAME
      EXTERNAL      LSAME 
 !
 !     External subroutines (BLAS and LAPACK)
 !     ~~~~~~~~~~~~~~~~~~~~
      EXTERNAL      SGEMM 
      EXTERNAL      SGEQRF, SLACPY, SLASET, SORGQR, & 
                    SORMQR, XERBLA

 !     External subroutines
 !     ~~~~~~~~~~~~~~~~~~~~
      EXTERNAL      SGEDMD 
      
 !     Intrinsic functions
 !     ~~~~~~~~~~~~~~~~~~~
      INTRINSIC      MAX, MIN, INT         
 !..........................................................  
 !
 !    Test the input arguments    
      WNTRES = LSAME(JOBR,'R')
      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
      SCCOLY = LSAME(JOBS,'Y')
      WNTVEC = LSAME(JOBZ,'V') 
      WNTVCF = LSAME(JOBZ,'F')
      WNTVCQ = LSAME(JOBZ,'Q')
      WNTREF = LSAME(JOBF,'R') 
      WNTEX  = LSAME(JOBF,'E')
      WANTQ  = LSAME(JOBQ,'Q')
      WNTTRF = LSAME(JOBT,'R')     
      MINMN  = MIN(M,N)
      INFO = 0 
      LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) )
 !       
      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. LSAME(JOBS,'N')) )  THEN 
          INFO = -1
      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ       &
                              .OR. LSAME(JOBZ,'N')) ) THEN
          INFO = -2
      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    & 
          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
          INFO = -3
      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
          INFO = -4                 
      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
          INFO = -5
      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             & 
                LSAME(JOBF,'N') ) )                    THEN
          INFO = -6    
      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
          INFO = -7
      ELSE IF ( M < 0 ) THEN
          INFO = -8
      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
          INFO = -9
      ELSE IF ( LDF < M ) THEN
          INFO = -11
      ELSE IF ( LDX < MINMN ) THEN
          INFO = -13
      ELSE IF ( LDY < MINMN ) THEN
          INFO = -15
      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    & 
                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
          INFO = -16
      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
          INFO = -17
      ELSE IF ( LDZ < M ) THEN
          INFO = -22
      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
          INFO = -25
      ELSE IF ( LDV < N-1 ) THEN
          INFO = -27
      ELSE IF ( LDS < N-1 ) THEN
          INFO = -29
      END IF
 !      
      IF ( WNTVEC .OR. WNTVCF ) THEN
          JOBVL = 'V'
      ELSE
          JOBVL = 'N'
      END IF     
      IF ( INFO == 0 ) THEN  
          ! Compute the minimal and the optimal workspace
          ! requirements. Simulate running the code and 
          ! determine minimal and optimal sizes of the 
          ! workspace at any moment of the run.         
         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
             ! All output except K is void. INFO=1 signals
             ! the void input. In case of a workspace query,
             ! the minimal workspace lengths are returned.
            IF ( LQUERY ) THEN  
               IWORK(1) = 1
                WORK(1) = 2
                WORK(2) = 2
            ELSE                
               K = 0
            END IF             
            INFO = 1  
            RETURN
         END IF     
         MLWQR  = MAX(1,N)  ! Minimal workspace length for SGEQRF.
         MLWORK = MIN(M,N) + MLWQR 
         IF ( LQUERY ) THEN 
             CALL SGEQRF( M, N, F, LDF, WORK, RDUMMY, -1, &
                          INFO1 )
             OLWQR = INT(RDUMMY(1))
             OLWORK = MIN(M,N) + OLWQR           
         END IF
         CALL SGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& 
                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     & 
                      REIG, IMEIG, Z, LDZ, RES,  B, LDB,     & 
                      V, LDV, S, LDS, WORK, -1, IWORK,       &
                      LIWORK, INFO1 )
         MLWDMD = INT(WORK(1))
         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
         IMINWR = IWORK(1)
         IF ( LQUERY ) THEN 
             OLWDMD = INT(WORK(2))
             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
         END IF
         IF ( WNTVEC .OR. WNTVCF ) THEN
            MLWMQR = MAX(1,N) 
            MLWORK = MAX(MLWORK,MINMN+N-1+MLWMQR)
            IF ( LQUERY ) THEN
               CALL SORMQR( 'L','N', M, N, MINMN, F, LDF,  & 
                            WORK, Z, LDZ, WORK, -1, INFO1 )
               OLWMQR = INT(WORK(1))
               OLWORK = MAX(OLWORK,MINMN+N-1+OLWMQR)
            END IF
         END IF  
         IF ( WANTQ ) THEN
            MLWGQR = N
            MLWORK = MAX(MLWORK,MINMN+N-1+MLWGQR)
            IF ( LQUERY ) THEN 
                CALL SORGQR( M, MINMN, MINMN, F, LDF, WORK, &
                             WORK, -1, INFO1 )        
                OLWGQR = INT(WORK(1))
                OLWORK = MAX(OLWORK,MINMN+N-1+OLWGQR)
            END IF            
         END IF   
         IMINWR = MAX( 1, IMINWR )
         MLWORK = MAX( 2, MLWORK )      
         IF (  LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -31
         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -33
      END IF  
      IF( INFO /= 0 ) THEN
         CALL XERBLA( 'SGEDMDQ', -INFO )
         RETURN
      ELSE IF ( LQUERY ) THEN
 !     Return minimal and optimal workspace sizes
          IWORK(1) = IMINWR
          WORK(1)  = MLWORK
          WORK(2)  = OLWORK
          RETURN
      END IF   
 !.....	  
 !     Initial QR factorization that is used to represent the
 !     snapshots as elements of lower dimensional subspace.
 !     For large scale computation with M >>N , at this place 
 !     one can use an out of core QRF.
 !   
      CALL SGEQRF( M, N, F, LDF, WORK,         & 
                   WORK(MINMN+1), LWORK-MINMN, INFO1 )
 !      
 !     Define X and Y as the snapshots representations in the
 !     orthogonal basis computed in the QR factorization.
 !     X corresponds to the leading N-1 and Y to the trailing
 !     N-1 snapshots.
      CALL SLASET( 'L', MINMN, N-1, ZERO,  ZERO, X, LDX )
      CALL SLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
      CALL SLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
      IF ( M >= 3 ) THEN
          CALL SLASET( 'L', MINMN-2, N-2, ZERO,  ZERO, &
                       Y(3,1), LDY )  
      END IF
 !
 !     Compute the DMD of the projected snapshot pairs (X,Y)   
      CALL SGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,  &
                   N-1, X, LDX, Y, LDY, NRNK,   TOL, K,     &
                   REIG, IMEIG, Z, LDZ, RES, B, LDB, V,     &
                   LDV, S, LDS, WORK(MINMN+1), LWORK-MINMN, IWORK,  & 
                   LIWORK, INFO1 )
      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
          ! Return with error code.
          INFO = INFO1
          RETURN
      ELSE
          INFO = INFO1
      END IF    
 !      
 !     The Ritz vectors (Koopman modes) can be explicitly 
 !     formed or returned in factored form.
      IF ( WNTVEC ) THEN
        ! Compute the eigenvectors explicitly.  
        IF ( M > MINMN ) CALL SLASET( 'A', M-MINMN, K, ZERO, &
                                     ZERO, Z(MINMN+1,1), LDZ )
        CALL SORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
                     LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
      ELSE IF ( WNTVCF ) THEN   
        !   Return the Ritz vectors (eigenvectors) in factored
        !   form Z*V, where Z contains orthonormal matrix (the
        !   product of Q from the initial QR factorization and 
        !   the SVD/POD_basis returned by SGEDMD in X) and the 
        !   second factor (the eigenvectors of the Rayleigh 
        !   quotient) is in the array V, as returned by SGEDMD.
        CALL SLACPY( 'A', N, K, X, LDX, Z, LDZ )
        IF ( M > N ) CALL SLASET( 'A', M-N, K, ZERO, ZERO,   & 
                                  Z(N+1,1), LDZ )
        CALL SORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z,  &
             LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )
      END IF
 !     
 !     Some optional output variables:
 !
 !     The upper triangular factor in the initial QR 
 !     factorization is optionally returned in the array Y.
 !     This is useful if this call to SGEDMDQ is to be 
 !     followed by a streaming DMD that is implemented in a 
 !     QR compressed form.
      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y 
         CALL SLASET( 'A', MINMN, N, ZERO,  ZERO, Y, LDY )
         CALL SLACPY( 'U', MINMN, N, F, LDF,      Y, LDY )
      END IF    
 !
 !     The orthonormal/orthogonal factor in the initial QR 
 !     factorization is optionally returned in the array F. 
 !     Same as with the triangular factor above, this is 
 !     useful in a streaming DMD.
      IF ( WANTQ ) THEN  ! Q overwrites F 
         CALL SORGQR( M, MINMN, MINMN, F, LDF, WORK, &
              WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 )  
      END IF
 !      
      RETURN
 !      
      END SUBROUTINE SGEDMDQ
    
--- a/lapack-netlib/SRC/zgedmd.f90
+++ b/lapack-netlib/SRC/zgedmd.f90
@@ -0,0 +1,996 @@
      SUBROUTINE ZGEDMD( JOBS, JOBZ, JOBR, JOBF,  WHTSVD,   &
                         M, N, X, LDX, Y, LDY, NRNK, TOL,   &
                         K, EIGS, Z, LDZ, RES, B,    LDB,   &
                         W, LDW,  S, LDS, ZWORK,  LZWORK,   &
                         RWORK, LRWORK, IWORK, LIWORK, INFO )
 ! March 2023
 !.....
      USE                   iso_fortran_env
      IMPLICIT NONE
      INTEGER, PARAMETER :: WP = real64

 !.....
 !     Scalar arguments
      CHARACTER, INTENT(IN)   :: JOBS,   JOBZ,  JOBR,  JOBF
      INTEGER,   INTENT(IN)   :: WHTSVD, M, N,   LDX,  LDY, &
                                 NRNK, LDZ, LDB, LDW,  LDS, &
                                 LIWORK, LRWORK, LZWORK
      INTEGER,       INTENT(OUT)  :: K, INFO
      REAL(KIND=WP), INTENT(IN)   ::    TOL
 !     Array arguments
      COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*)
      COMPLEX(KIND=WP), INTENT(OUT)   :: Z(LDZ,*), B(LDB,*), &
                                         W(LDW,*), S(LDS,*)
      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
      REAL(KIND=WP),    INTENT(OUT)   :: RES(*)
      REAL(KIND=WP),    INTENT(OUT)   :: RWORK(*)
      INTEGER,          INTENT(OUT)   :: IWORK(*)
 !............................................................
 !     Purpose
 !     =======
 !     ZGEDMD computes the Dynamic Mode Decomposition (DMD) for
 !     a pair of data snapshot matrices. For the input matrices
 !     X and Y such that Y = A*X with an unaccessible matrix
 !     A, ZGEDMD computes a certain number of Ritz pairs of A using
 !     the standard Rayleigh-Ritz extraction from a subspace of
 !     range(X) that is determined using the leading left singular
 !     vectors of X. Optionally, ZGEDMD returns the residuals
 !     of the computed Ritz pairs, the information needed for
 !     a refinement of the Ritz vectors, or the eigenvectors of
 !     the Exact DMD.
 !     For further details see the references listed
 !     below. For more details of the implementation see [3].
 !
 !     References
 !     ==========
 !     [1] P. Schmid: Dynamic mode decomposition of numerical
 !         and experimental data,
 !         Journal of Fluid Mechanics 656, 5-28, 2010.
 !     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
 !         decompositions: analysis and enhancements,
 !         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
 !     [3] Z. Drmac: A LAPACK implementation of the Dynamic
 !         Mode Decomposition I. Technical report. AIMDyn Inc.
 !         and LAPACK Working Note 298.
 !     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L.
 !         Brunton, N. Kutz: On Dynamic Mode Decomposition:
 !         Theory and Applications, Journal of Computational
 !         Dynamics 1(2), 391 -421, 2014.
 !
 !......................................................................
 !     Developed and supported by:
 !     ===========================
 !     Developed and coded by Zlatko Drmac, Faculty of Science,
 !     University of Zagreb;  drmac@math.hr
 !     In cooperation with
 !     AIMdyn Inc., Santa Barbara, CA.
 !     and supported by
 !     - DARPA SBIR project "Koopman Operator-Based Forecasting
 !     for Nonstationary Processes from Near-Term, Limited
 !     Observational Data" Contract No: W31P4Q-21-C-0007
 !     - DARPA PAI project "Physics-Informed Machine Learning
 !     Methodologies" Contract No: HR0011-18-9-0033
 !     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
 !     Framework for Space-Time Analysis of Process Dynamics"
 !     Contract No: HR0011-16-C-0116
 !     Any opinions, findings and conclusions or recommendations
 !     expressed in this material are those of the author and
 !     do not necessarily reflect the views of the DARPA SBIR
 !     Program Office
 !============================================================
 !     Distribution Statement A:
 !     Approved for Public Release, Distribution Unlimited.
 !     Cleared by DARPA on September 29, 2022
 !============================================================
 !............................................................
 !     Arguments
 !     =========
 !     JOBS (input) CHARACTER*1
 !     Determines whether the initial data snapshots are scaled
 !     by a diagonal matrix.
 !     'S' :: The data snapshots matrices X and Y are multiplied
 !            with a diagonal matrix D so that X*D has unit
 !            nonzero columns (in the Euclidean 2-norm)
 !     'C' :: The snapshots are scaled as with the 'S' option.
 !            If it is found that an i-th column of X is zero
 !            vector and the corresponding i-th column of Y is
 !            non-zero, then the i-th column of Y is set to
 !            zero and a warning flag is raised.
 !     'Y' :: The data snapshots matrices X and Y are multiplied
 !            by a diagonal matrix D so that Y*D has unit
 !            nonzero columns (in the Euclidean 2-norm)
 !     'N' :: No data scaling.
 !.....
 !     JOBZ (input) CHARACTER*1
 !     Determines whether the eigenvectors (Koopman modes) will
 !     be computed.
 !     'V' :: The eigenvectors (Koopman modes) will be computed
 !            and returned in the matrix Z.
 !            See the description of Z.
 !     'F' :: The eigenvectors (Koopman modes) will be returned
 !            in factored form as the product X(:,1:K)*W, where X
 !            contains a POD basis (leading left singular vectors
 !            of the data matrix X) and W contains the eigenvectors
 !            of the corresponding Rayleigh quotient.
 !            See the descriptions of K, X, W, Z.
 !     'N' :: The eigenvectors are not computed.
 !.....
 !     JOBR (input) CHARACTER*1
 !     Determines whether to compute the residuals.
 !     'R' :: The residuals for the computed eigenpairs will be
 !            computed and stored in the array RES.
 !            See the description of RES.
 !            For this option to be legal, JOBZ must be 'V'.
 !     'N' :: The residuals are not computed.
 !.....
 !     JOBF (input) CHARACTER*1
 !     Specifies whether to store information needed for post-
 !     processing (e.g. computing refined Ritz vectors)
 !     'R' :: The matrix needed for the refinement of the Ritz
 !            vectors is computed and stored in the array B.
 !            See the description of B.
 !     'E' :: The unscaled eigenvectors of the Exact DMD are
 !            computed and returned in the array B. See the
 !            description of B.
 !     'N' :: No eigenvector refinement data is computed.
 !.....
 !     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
 !     Allows for a selection of the SVD algorithm from the
 !     LAPACK library.
 !     1 :: ZGESVD (the QR SVD algorithm)
 !     2 :: ZGESDD (the Divide and Conquer algorithm; if enough
 !          workspace available, this is the fastest option)
 !     3 :: ZGESVDQ (the preconditioned QR SVD  ; this and 4
 !          are the most accurate options)
 !     4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3
 !          are the most accurate options)
 !     For the four methods above, a significant difference in
 !     the accuracy of small singular values is possible if
 !     the snapshots vary in norm so that X is severely
 !     ill-conditioned. If small (smaller than EPS*||X||)
 !     singular values are of interest and JOBS=='N',  then
 !     the options (3, 4) give the most accurate results, where
 !     the option 4 is slightly better and with stronger
 !     theoretical background.
 !     If JOBS=='S', i.e. the columns of X will be normalized,
 !     then all methods give nearly equally accurate results.
 !.....
 !     M (input) INTEGER, M>= 0
 !     The state space dimension (the row dimension of X, Y).
 !.....
 !     N (input) INTEGER, 0 <= N <= M
 !     The number of data snapshot pairs
 !     (the number of columns of X and Y).
 !.....
 !     X (input/output) COMPLEX(KIND=WP) M-by-N array
 !   > On entry, X contains the data snapshot matrix X. It is
 !     assumed that the column norms of X are in the range of
 !     the normalized floating point numbers.
 !   < On exit, the leading K columns of X contain a POD basis,
 !     i.e. the leading K left singular vectors of the input
 !     data matrix X, U(:,1:K). All N columns of X contain all
 !     left singular vectors of the input matrix X.
 !     See the descriptions of K, Z and W.
 !.....
 !     LDX (input) INTEGER, LDX >= M
 !     The leading dimension of the array X.
 !.....
 !     Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array
 !   > On entry, Y contains the data snapshot matrix Y
 !   < On exit,
 !     If JOBR == 'R', the leading K columns of Y  contain
 !     the residual vectors for the computed Ritz pairs.
 !     See the description of RES.
 !     If JOBR == 'N', Y contains the original input data,
 !                     scaled according to the value of JOBS.
 !.....
 !     LDY (input) INTEGER , LDY >= M
 !     The leading dimension of the array Y.
 !.....
 !     NRNK (input) INTEGER
 !     Determines the mode how to compute the numerical rank,
 !     i.e. how to truncate small singular values of the input
 !     matrix X. On input, if
 !     NRNK = -1 :: i-th singular value sigma(i) is truncated
 !                  if sigma(i) <= TOL*sigma(1)
 !                  This option is recommended.
 !     NRNK = -2 :: i-th singular value sigma(i) is truncated
 !                  if sigma(i) <= TOL*sigma(i-1)
 !                  This option is included for R&D purposes.
 !                  It requires highly accurate SVD, which
 !                  may not be feasible.
 !     The numerical rank can be enforced by using positive
 !     value of NRNK as follows:
 !     0 < NRNK <= N :: at most NRNK largest singular values
 !     will be used. If the number of the computed nonzero
 !     singular values is less than NRNK, then only those
 !     nonzero values will be used and the actually used
 !     dimension is less than NRNK. The actual number of
 !     the nonzero singular values is returned in the variable
 !     K. See the descriptions of TOL and  K.
 !.....
 !     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
 !     The tolerance for truncating small singular values.
 !     See the description of NRNK.
 !.....
 !     K (output) INTEGER,  0 <= K <= N
 !     The dimension of the POD basis for the data snapshot
 !     matrix X and the number of the computed Ritz pairs.
 !     The value of K is determined according to the rule set
 !     by the parameters NRNK and TOL.
 !     See the descriptions of NRNK and TOL.
 !.....
 !     EIGS (output) COMPLEX(KIND=WP) N-by-1 array
 !     The leading K (K<=N) entries of EIGS contain
 !     the computed eigenvalues (Ritz values).
 !     See the descriptions of K, and Z.
 !.....
 !     Z (workspace/output) COMPLEX(KIND=WP)  M-by-N array
 !     If JOBZ =='V' then Z contains the  Ritz vectors.  Z(:,i)
 !     is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
 !     If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
 !     the columns of X(:,1:K)*W(1:K,1:K), i.e. X(:,1:K)*W(:,i)
 !     is an eigenvector corresponding to EIGS(i). The columns
 !     of W(1:k,1:K) are the computed eigenvectors of the
 !     K-by-K Rayleigh quotient.
 !     See the descriptions of EIGS, X and W.
 !.....
 !     LDZ (input) INTEGER , LDZ >= M
 !     The leading dimension of the array Z.
 !.....
 !     RES (output) REAL(KIND=WP) N-by-1 array
 !     RES(1:K) contains the residuals for the K computed
 !     Ritz pairs,
 !     RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
 !     See the description of EIGS and Z.
 !.....
 !     B (output) COMPLEX(KIND=WP)  M-by-N array.
 !     IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can
 !     be used for computing the refined vectors; see further
 !     details in the provided references.
 !     If JOBF == 'E', B(1:M,1:K) contains
 !     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
 !     Exact DMD, up to scaling by the inverse eigenvalues.
 !     If JOBF =='N', then B is not referenced.
 !     See the descriptions of X, W, K.
 !.....
 !     LDB (input) INTEGER, LDB >= M
 !     The leading dimension of the array B.
 !.....
 !     W (workspace/output) COMPLEX(KIND=WP) N-by-N array
 !     On exit, W(1:K,1:K) contains the K computed
 !     eigenvectors of the matrix Rayleigh quotient.
 !     The Ritz vectors (returned in Z) are the
 !     product of X (containing a POD basis for the input
 !     matrix X) and W. See the descriptions of K, S, X and Z.
 !     W is also used as a workspace to temporarily store the
 !     right singular vectors of X.
 !.....
 !     LDW (input) INTEGER, LDW >= N
 !     The leading dimension of the array W.
 !.....
 !     S (workspace/output) COMPLEX(KIND=WP) N-by-N array
 !     The array S(1:K,1:K) is used for the matrix Rayleigh
 !     quotient. This content is overwritten during
 !     the eigenvalue decomposition by ZGEEV.
 !     See the description of K.
 !.....
 !     LDS (input) INTEGER, LDS >= N
 !     The leading dimension of the array S.
 !.....
 !     ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array
 !     ZWORK is used as complex workspace in the complex SVD, as
 !     specified by WHTSVD (1,2, 3 or 4) and for ZGEEV for computing
 !     the eigenvalues of a Rayleigh quotient.
 !     If the call to ZGEDMD is only workspace query, then
 !     ZWORK(1) contains the minimal complex workspace length and
 !     ZWORK(2) is the optimal complex workspace length.
 !     Hence, the length of work is at least 2.
 !     See the description of LZWORK.
 !.....
 !     LZWORK (input) INTEGER
 !     The minimal length of the workspace vector ZWORK.
 !     LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_ZGEEV),
 !     where LZWORK_ZGEEV = MAX( 1, 2*N )  and the minimal
 !     LZWORK_SVD is calculated as follows
 !     If WHTSVD == 1 :: ZGESVD ::
 !        LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N))
 !     If WHTSVD == 2 :: ZGESDD ::
 !        LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
 !     If WHTSVD == 3 :: ZGESVDQ ::
 !        LZWORK_SVD = obtainable by a query
 !     If WHTSVD == 4 :: ZGEJSV ::
 !        LZWORK_SVD = obtainable by a query
 !     If on entry LZWORK = -1, then a workspace query is
 !     assumed and the procedure only computes the minimal
 !     and the optimal workspace lengths and returns them in
 !     LZWORK(1) and LZWORK(2), respectively.
 !.....
 !     RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array
 !     On exit, RWORK(1:N) contains the singular values of
 !     X (for JOBS=='N') or column scaled X (JOBS=='S', 'C').
 !     If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain
 !     scaling factor RWORK(N+2)/RWORK(N+1) used to scale X
 !     and Y to avoid overflow in the SVD of X.
 !     This may be of interest if the scaling option is off
 !     and as many as possible smallest eigenvalues are
 !     desired to the highest feasible accuracy.
 !     If the call to ZGEDMD is only workspace query, then
 !     RWORK(1) contains the minimal workspace length.
 !     See the description of LRWORK.
 !.....
 !     LRWORK (input) INTEGER
 !     The minimal length of the workspace vector RWORK.
 !     LRWORK is calculated as follows:
 !     LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_ZGEEV), where
 !     LRWORK_ZGEEV = MAX(1,2*N) and RWORK_SVD is the real workspace
 !     for the SVD subroutine determined by the input parameter
 !     WHTSVD.
 !     If WHTSVD == 1 :: ZGESVD ::
 !        LRWORK_SVD = 5*MIN(M,N)
 !     If WHTSVD == 2 :: ZGESDD ::
 !        LRWORK_SVD =  MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N),
 !        2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
 !     If WHTSVD == 3 :: ZGESVDQ ::
 !        LRWORK_SVD = obtainable by a query
 !     If WHTSVD == 4 :: ZGEJSV ::
 !        LRWORK_SVD = obtainable by a query
 !     If on entry LRWORK = -1, then a workspace query is
 !     assumed and the procedure only computes the minimal
 !     real workspace length and returns it in RWORK(1).
 !.....
 !     IWORK (workspace/output) INTEGER LIWORK-by-1 array
 !     Workspace that is required only if WHTSVD equals
 !     2 , 3 or 4. (See the description of WHTSVD).
 !     If on entry LWORK =-1 or LIWORK=-1, then the
 !     minimal length of IWORK is computed and returned in
 !     IWORK(1). See the description of LIWORK.
 !.....
 !     LIWORK (input) INTEGER
 !     The minimal length of the workspace vector IWORK.
 !     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
 !     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N))
 !     If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1)
 !     If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N)
 !     If on entry LIWORK = -1, then a workspace query is
 !     assumed and the procedure only computes the minimal
 !     and the optimal workspace lengths for  ZWORK, RWORK and
 !     IWORK. See the descriptions of ZWORK, RWORK and IWORK.
 !.....
 !     INFO (output) INTEGER
 !     -i < 0 :: On entry, the i-th argument had an
 !               illegal value
 !        = 0 :: Successful return.
 !        = 1 :: Void input. Quick exit (M=0 or N=0).
 !        = 2 :: The SVD computation of X did not converge.
 !               Suggestion: Check the input data and/or
 !               repeat with different WHTSVD.
 !        = 3 :: The computation of the eigenvalues did not
 !               converge.
 !        = 4 :: If data scaling was requested on input and
 !               the procedure found inconsistency in the data
 !               such that for some column index i,
 !               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
 !               to zero if JOBS=='C'. The computation proceeds
 !               with original or modified data and warning
 !               flag is set with INFO=4.
 !.............................................................
 !.............................................................
 !     Parameters
 !     ~~~~~~~~~~
      REAL(KIND=WP),    PARAMETER ::  ONE = 1.0_WP
      REAL(KIND=WP),    PARAMETER :: ZERO = 0.0_WP
      COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )

 !     Local scalars
 !     ~~~~~~~~~~~~~
      REAL(KIND=WP) :: OFL,   ROOTSC, SCALE,  SMALL,    &
                       SSUM,  XSCL1,  XSCL2
      INTEGER       ::  i,  j,  IMINWR,  INFO1, INFO2,  &
                        LWRKEV, LWRSDD, LWRSVD, LWRSVJ, &
                        LWRSVQ, MLWORK, MWRKEV, MWRSDD, &
                        MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, &
                        OLWORK, MLRWRK
      LOGICAL       ::  BADXY, LQUERY, SCCOLX, SCCOLY,  &
                        WNTEX, WNTREF, WNTRES, WNTVEC
      CHARACTER     ::  JOBZL, T_OR_N
      CHARACTER     ::  JSVOPT
 !
 !     Local arrays
 !     ~~~~~~~~~~~~
      REAL(KIND=WP) :: RDUMMY(2)

 !     External functions (BLAS and LAPACK)
 !     ~~~~~~~~~~~~~~~~~
      REAL(KIND=WP) ZLANGE, DLAMCH, DZNRM2
      EXTERNAL      ZLANGE, DLAMCH, DZNRM2, IZAMAX
      INTEGER                               IZAMAX
      LOGICAL       DISNAN, LSAME
      EXTERNAL      DISNAN, LSAME

 !     External subroutines (BLAS and LAPACK)
 !     ~~~~~~~~~~~~~~~~~~~~
      EXTERNAL      ZAXPY,  ZGEMM,  ZDSCAL
      EXTERNAL      ZGEEV,  ZGEJSV, ZGESDD, ZGESVD, ZGESVDQ, &
                    ZLACPY, ZLASCL, ZLASSQ, XERBLA

 !     Intrinsic functions
 !     ~~~~~~~~~~~~~~~~~~~
      INTRINSIC     DBLE, INT, MAX, SQRT
 !............................................................
 !
 !    Test the input arguments
 !
      WNTRES = LSAME(JOBR,'R')
      SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C')
      SCCOLY = LSAME(JOBS,'Y')
      WNTVEC = LSAME(JOBZ,'V')
      WNTREF = LSAME(JOBF,'R')
      WNTEX  = LSAME(JOBF,'E')
      INFO   = 0
      LQUERY = ( ( LZWORK == -1 ) .OR. ( LIWORK == -1 ) &
                                  .OR. ( LRWORK == -1 ) )
 !
      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. &
                                  LSAME(JOBS,'N')) )   THEN
          INFO = -1
      ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N')        &
                              .OR. LSAME(JOBZ,'F')) )  THEN
          INFO = -2
      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.  &
                ( WNTRES .AND. (.NOT.WNTVEC) ) )       THEN
          INFO = -3
      ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             &
                LSAME(JOBF,'N') ) )                    THEN
          INFO = -4
      ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR.  &
                      (WHTSVD == 3) .OR. (WHTSVD == 4) )) THEN
          INFO = -5
      ELSE IF ( M < 0 )   THEN
          INFO = -6
      ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN
          INFO = -7
      ELSE IF ( LDX < M ) THEN
          INFO = -9
      ELSE IF ( LDY < M ) THEN
          INFO = -11
      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. &
                ((NRNK >= 1).AND.(NRNK <=N ))) )      THEN
          INFO = -12
      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) )  THEN
          INFO = -13
      ELSE IF ( LDZ < M ) THEN
          INFO = -17
      ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN
          INFO = -20
      ELSE IF ( LDW < N ) THEN
          INFO = -22
      ELSE IF ( LDS < N ) THEN
          INFO = -24
      END IF
 !
      IF ( INFO == 0 ) THEN
          ! Compute the minimal and the optimal workspace
          ! requirements. Simulate running the code and
          ! determine minimal and optimal sizes of the
          ! workspace at any moment of the run.
         IF ( N == 0 ) THEN
             ! Quick return. All output except K is void.
             ! INFO=1 signals the void input.
             ! In case of a workspace query, the default
             ! minimal workspace lengths are returned.
            IF ( LQUERY ) THEN
                IWORK(1) = 1
                RWORK(1) = 1
                ZWORK(1) = 2
                ZWORK(2) = 2
            ELSE
               K   =  0
            END IF
            INFO = 1
            RETURN
         END IF

         IMINWR = 1
         MLRWRK = MAX(1,N)
         MLWORK = 2
         OLWORK = 2
         SELECT CASE ( WHTSVD )
         CASE (1)
             ! The following is specified as the minimal
             ! length of WORK in the definition of ZGESVD:
             ! MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
             MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N))
             MLWORK = MAX(MLWORK,MWRSVD)
             MLRWRK = MAX(MLRWRK,N + 5*MIN(M,N))
             IF ( LQUERY ) THEN
                CALL ZGESVD( 'O', 'S', M, N, X, LDX, RWORK, &
                     B, LDB, W, LDW, ZWORK, -1, RDUMMY, INFO1 )
                LWRSVD = INT( ZWORK(1) )
                OLWORK = MAX(OLWORK,LWRSVD)
             END IF
         CASE (2)
             ! The following is specified as the minimal
             ! length of WORK in the definition of ZGESDD:
             ! MWRSDD = 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
             ! RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N)
             ! In LAPACK 3.10.1 RWORK is defined differently.
             ! Below we take max over the two versions.
             ! IMINWR = 8*MIN(M,N)
             MWRSDD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N)
             MLWORK = MAX(MLWORK,MWRSDD)
             IMINWR = 8*MIN(M,N)
             MLRWRK = MAX( MLRWRK,  N +                    &
                      MAX( 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), &
                           5*MIN(M,N)*MIN(M,N)+5*MIN(M,N), &
                           2*MAX(M,N)*MIN(M,N)+            &
                           2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) )
             IF ( LQUERY ) THEN
                CALL ZGESDD( 'O', M, N, X, LDX, RWORK, B,LDB,&
                     W, LDW, ZWORK, -1, RDUMMY, IWORK, INFO1 )
                LWRSDD = MAX( MWRSDD,INT( ZWORK(1) ))
                ! Possible bug in ZGESDD optimal workspace size.
                OLWORK = MAX(OLWORK,LWRSDD)
             END IF
         CASE (3)
             CALL ZGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
                  X, LDX, RWORK, Z, LDZ, W, LDW, NUMRNK,  &
                  IWORK, -1, ZWORK, -1, RDUMMY, -1, INFO1 )
             IMINWR = IWORK(1)
             MWRSVQ = INT(ZWORK(2))
             MLWORK = MAX(MLWORK,MWRSVQ)
             MLRWRK  = MAX(MLRWRK,N + INT(RDUMMY(1)))
             IF ( LQUERY ) THEN
                LWRSVQ = INT(ZWORK(1))
                OLWORK = MAX(OLWORK,LWRSVQ)
             END IF
         CASE (4)
             JSVOPT = 'J'
             CALL ZGEJSV( 'F', 'U', JSVOPT, 'R', 'N', 'P', M, &
                   N, X, LDX, RWORK, Z, LDZ, W, LDW,       &
                   ZWORK, -1, RDUMMY, -1, IWORK, INFO1 )
             IMINWR = IWORK(1)
             MWRSVJ = INT(ZWORK(2))
             MLWORK = MAX(MLWORK,MWRSVJ)
             MLRWRK = MAX(MLRWRK,N + MAX(7,INT(RDUMMY(1))))
             IF ( LQUERY ) THEN
                LWRSVJ = INT(ZWORK(1))
                OLWORK = MAX(OLWORK,LWRSVJ)
             END IF
         END SELECT
         IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN
             JOBZL = 'V'
         ELSE
             JOBZL = 'N'
         END IF
         ! Workspace calculation to the ZGEEV call
         MWRKEV = MAX( 1, 2*N )
         MLWORK = MAX(MLWORK,MWRKEV)
         MLRWRK = MAX(MLRWRK,N+2*N)
         IF ( LQUERY ) THEN
             CALL ZGEEV( 'N', JOBZL, N, S, LDS, EIGS, &
              W, LDW, W, LDW, ZWORK, -1, RWORK, INFO1 )
                LWRKEV = INT(ZWORK(1))
                OLWORK = MAX( OLWORK, LWRKEV )
         END IF
 !
         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -30
         IF ( LRWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -28
         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -26

      END IF
 !
      IF( INFO /= 0 ) THEN
         CALL XERBLA( 'ZGEDMD', -INFO )
         RETURN
      ELSE IF ( LQUERY ) THEN
 !     Return minimal and optimal workspace sizes
          IWORK(1) = IMINWR
          RWORK(1) = MLRWRK
          ZWORK(1) = MLWORK
          ZWORK(2) = OLWORK
          RETURN
      END IF
 !............................................................
 !
      OFL   = DLAMCH('O')
      SMALL = DLAMCH('S')
      BADXY = .FALSE.
 !
 !     <1> Optional scaling of the snapshots (columns of X, Y)
 !     ==========================================================
      IF ( SCCOLX ) THEN
          ! The columns of X will be normalized.
          ! To prevent overflows, the column norms of X are
          ! carefully computed using ZLASSQ.
          K = 0
          DO i = 1, N
            !WORK(i) = DZNRM2( M, X(1,i), 1 )
            SCALE  = ZERO
            CALL ZLASSQ( M, X(1,i), 1, SCALE, SSUM )
            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
                K    =  0
                INFO = -8
                CALL XERBLA('ZGEDMD',-INFO)
            END IF
            IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN
               ROOTSC = SQRT(SSUM)
               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
 !                 Norm of X(:,i) overflows. First, X(:,i)
 !                 is scaled by
 !                 ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2.
 !                 Next, the norm of X(:,i) is stored without
 !                 overflow as RWORK(i) = - SCALE * (ROOTSC/M),
 !                 the minus sign indicating the 1/M factor.
 !                 Scaling is performed without overflow, and
 !                 underflow may occur in the smallest entries
 !                 of X(:,i). The relative backward and forward
 !                 errors are small in the ell_2 norm.
                  CALL ZLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
                               M, 1, X(1,i), LDX, INFO2 )
                  RWORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
               ELSE
 !                 X(:,i) will be scaled to unit 2-norm
                  RWORK(i) =   SCALE * ROOTSC
                  CALL ZLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
                               X(1,i), LDX, INFO2 )      ! LAPACK CALL
 !                 X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)   ! INTRINSIC
               END IF
            ELSE
               RWORK(i) = ZERO
               K = K + 1
            END IF
          END DO
          IF ( K == N ) THEN
             ! All columns of X are zero. Return error code -8.
             ! (the 8th input variable had an illegal value)
             K = 0
             INFO = -8
             CALL XERBLA('ZGEDMD',-INFO)
             RETURN
          END IF
          DO i = 1, N
 !           Now, apply the same scaling to the columns of Y.
            IF ( RWORK(i) >  ZERO ) THEN
                CALL ZDSCAL( M, ONE/RWORK(i), Y(1,i), 1 )  ! BLAS CALL
 !               Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)       ! INTRINSIC
            ELSE IF ( RWORK(i) < ZERO ) THEN
                CALL ZLASCL( 'G', 0, 0, -RWORK(i),          &
                     ONE/DBLE(M), M, 1, Y(1,i), LDY, INFO2 ) ! LAPACK CALL
            ELSE IF ( ABS(Y(IZAMAX(M, Y(1,i),1),i ))  &
                                            /= ZERO ) THEN
 !               X(:,i) is zero vector. For consistency,
 !               Y(:,i) should also be zero. If Y(:,i) is not
 !               zero, then the data might be inconsistent or
 !               corrupted. If JOBS == 'C', Y(:,i) is set to
 !               zero and a warning flag is raised.
 !               The computation continues but the
 !               situation will be reported in the output.
                BADXY = .TRUE.
                IF ( LSAME(JOBS,'C')) &
                CALL ZDSCAL( M, ZERO, Y(1,i), 1 )  ! BLAS CALL
            END IF
          END DO
      END IF
  !
      IF ( SCCOLY ) THEN
          ! The columns of Y will be normalized.
          ! To prevent overflows, the column norms of Y are
          ! carefully computed using ZLASSQ.
          DO i = 1, N
            !RWORK(i) = DZNRM2( M, Y(1,i), 1 )
            SCALE  = ZERO
            CALL ZLASSQ( M, Y(1,i), 1, SCALE, SSUM )
            IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN
                K    =  0
                INFO = -10
                CALL XERBLA('ZGEDMD',-INFO)
            END IF
            IF ( SCALE /= ZERO  .AND. (SSUM /= ZERO) ) THEN
               ROOTSC = SQRT(SSUM)
               IF ( SCALE .GE. (OFL / ROOTSC) ) THEN
 !                 Norm of Y(:,i) overflows. First, Y(:,i)
 !                 is scaled by
 !                 ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2.
 !                 Next, the norm of Y(:,i) is stored without
 !                 overflow as RWORK(i) = - SCALE * (ROOTSC/M),
 !                 the minus sign indicating the 1/M factor.
 !                 Scaling is performed without overflow, and
 !                 underflow may occur in the smallest entries
 !                 of Y(:,i). The relative backward and forward
 !                 errors are small in the ell_2 norm.
                  CALL ZLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, &
                               M, 1, Y(1,i), LDY, INFO2 )
                  RWORK(i) = - SCALE * ( ROOTSC / DBLE(M) )
               ELSE
 !                 Y(:,i) will be scaled to unit 2-norm
                  RWORK(i) =   SCALE * ROOTSC
                  CALL ZLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, &
                               Y(1,i), LDY, INFO2 )             ! LAPACK CALL
 !                 Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i)          ! INTRINSIC
               END IF
            ELSE
               RWORK(i) = ZERO
            END IF
         END DO
         DO i = 1, N
 !           Now, apply the same scaling to the columns of X.
            IF ( RWORK(i) >  ZERO ) THEN
                CALL ZDSCAL( M, ONE/RWORK(i), X(1,i), 1 ) ! BLAS CALL
 !               X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i)      ! INTRINSIC
            ELSE IF ( RWORK(i) < ZERO ) THEN
                CALL ZLASCL( 'G', 0, 0, -RWORK(i),          &
                     ONE/DBLE(M), M, 1, X(1,i), LDX, INFO2 ) ! LAPACK CALL
            ELSE IF ( ABS(X(IZAMAX(M, X(1,i),1),i ))  &
                                           /= ZERO ) THEN
 !               Y(:,i) is zero vector.  If X(:,i) is not
 !               zero, then a warning flag is raised.
 !               The computation continues but the
 !               situation will be reported in the output.
                BADXY = .TRUE.
            END IF
         END DO
       END IF
 !
 !     <2> SVD of the data snapshot matrix X.
 !     =====================================
 !     The left singular vectors are stored in the array X.
 !     The right singular vectors are in the array W.
 !     The array W will later on contain the eigenvectors
 !     of a Rayleigh quotient.
      NUMRNK = N
      SELECT CASE ( WHTSVD )
         CASE (1)
             CALL ZGESVD( 'O', 'S', M, N, X, LDX, RWORK, B, &
                  LDB, W, LDW, ZWORK, LZWORK,  RWORK(N+1), INFO1 ) ! LAPACK CALL
             T_OR_N = 'C'
         CASE (2)
            CALL ZGESDD( 'O', M, N, X, LDX, RWORK, B, LDB, W, &
                 LDW, ZWORK, LZWORK, RWORK(N+1), IWORK, INFO1 )   ! LAPACK CALL
            T_OR_N = 'C'
         CASE (3)
              CALL ZGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, &
                   X, LDX, RWORK, Z, LDZ, W, LDW, &
                   NUMRNK, IWORK, LIWORK, ZWORK,     &
                   LZWORK, RWORK(N+1), LRWORK-N, INFO1)     ! LAPACK CALL
              CALL ZLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX )   ! LAPACK CALL
         T_OR_N = 'C'
         CASE (4)
              CALL ZGEJSV( 'F', 'U', JSVOPT, 'R', 'N', 'P', M, &
                   N, X, LDX, RWORK, Z, LDZ, W, LDW, &
                   ZWORK, LZWORK, RWORK(N+1), LRWORK-N, IWORK, INFO1 )    ! LAPACK CALL
              CALL ZLACPY( 'A', M, N, Z, LDZ, X, LDX )   ! LAPACK CALL
              T_OR_N = 'N'
              XSCL1 = RWORK(N+1)
              XSCL2 = RWORK(N+2)
              IF ( XSCL1 /=  XSCL2 ) THEN
                 ! This is an exceptional situation. If the
                 ! data matrices are not scaled and the
                 ! largest singular value of X overflows.
                 ! In that case ZGEJSV can return the SVD
                 ! in scaled form. The scaling factor can be used
                 ! to rescale the data (X and Y).
                 CALL ZLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2  )
              END IF
      END SELECT
 !
      IF ( INFO1 > 0 ) THEN
         ! The SVD selected subroutine did not converge.
         ! Return with an error code.
         INFO = 2
         RETURN
      END IF
 !
      IF ( RWORK(1) == ZERO ) THEN
          ! The largest computed singular value of (scaled)
          ! X is zero. Return error code -8
          ! (the 8th input variable had an illegal value).
          K = 0
          INFO = -8
          CALL XERBLA('ZGEDMD',-INFO)
          RETURN
      END IF
 !
      !<3> Determine the numerical rank of the data
      !    snapshots matrix X. This depends on the
      !    parameters NRNK and TOL.

      SELECT CASE ( NRNK )
          CASE ( -1 )
               K = 1
               DO i = 2, NUMRNK
                 IF ( ( RWORK(i) <= RWORK(1)*TOL ) .OR. &
                      ( RWORK(i) <= SMALL ) ) EXIT
                 K = K + 1
               END DO
          CASE ( -2 )
               K = 1
               DO i = 1, NUMRNK-1
                 IF ( ( RWORK(i+1) <= RWORK(i)*TOL  ) .OR. &
                      ( RWORK(i) <= SMALL ) ) EXIT
                 K = K + 1
               END DO
          CASE DEFAULT
               K = 1
               DO i = 2, NRNK
                  IF ( RWORK(i) <= SMALL ) EXIT
                  K = K + 1
               END DO
          END SELECT
      !   Now, U = X(1:M,1:K) is the SVD/POD basis for the
      !   snapshot data in the input matrix X.

      !<4> Compute the Rayleigh quotient S = U^H * A * U.
      !    Depending on the requested outputs, the computation
      !    is organized to compute additional auxiliary
      !    matrices (for the residuals and refinements).
      !
      !    In all formulas below, we need V_k*Sigma_k^(-1)
      !    where either V_k is in W(1:N,1:K), or V_k^H is in
      !    W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)).
      IF ( LSAME(T_OR_N, 'N') ) THEN
          DO i = 1, K
           CALL ZDSCAL( N, ONE/RWORK(i), W(1,i), 1 )    ! BLAS CALL
           ! W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i)      ! INTRINSIC
          END DO
      ELSE
          ! This non-unit stride access is due to the fact
          ! that ZGESVD, ZGESVDQ and ZGESDD return the
          ! adjoint matrix of the right singular vectors.
          !DO i = 1, K
          ! CALL ZDSCAL( N, ONE/RWORK(i), W(i,1), LDW )    ! BLAS CALL
          ! ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N)      ! INTRINSIC
          !END DO
          DO i = 1, K
              RWORK(N+i) = ONE/RWORK(i)
          END DO
          DO j = 1, N
             DO i = 1, K
                 W(i,j) = CMPLX(RWORK(N+i),ZERO,KIND=WP)*W(i,j)
             END DO
          END DO
      END IF
 !
      IF ( WNTREF ) THEN
         !
         ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K)))
         ! for computing the refined Ritz vectors
         ! (optionally, outside ZGEDMD).
          CALL ZGEMM( 'N', T_OR_N, M, K, N, ZONE, Y, LDY, W, &
                      LDW, ZZERO, Z, LDZ )                       ! BLAS CALL
          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! INTRINSIC, for T_OR_N=='C'
          ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K))                   ! INTRINSIC, for T_OR_N=='N'
          !
          ! At this point Z contains
          ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and
          ! this is needed for computing the residuals.
          ! This matrix is  returned in the array B and
          ! it can be used to compute refined Ritz vectors.
          CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB )   ! BLAS CALL
          ! B(1:M,1:K) = Z(1:M,1:K)                  ! INTRINSIC

          CALL ZGEMM( 'C', 'N', K, K, M, ZONE, X, LDX, Z, &
                      LDZ, ZZERO, S, LDS )                        ! BLAS CALL
          ! S(1:K,1:K) = MATMUL(TRANSPOSE(CONJG(X(1:M,1:K))),Z(1:M,1:K)) ! INTRINSIC
          ! At this point S = U^H * A * U is the Rayleigh quotient.
      ELSE
        ! A * U(:,1:K) is not explicitly needed and the
        ! computation is organized differently. The Rayleigh
        ! quotient is computed more efficiently.
        CALL ZGEMM( 'C', 'N', K, N, M, ZONE, X, LDX, Y, LDY, &
                   ZZERO, Z, LDZ )                                         ! BLAS CALL
        ! Z(1:K,1:N) = MATMUL( TRANSPOSE(CONJG(X(1:M,1:K))), Y(1:M,1:N) )  ! INTRINSIC
        !
        CALL ZGEMM( 'N', T_OR_N, K, K, N, ZONE, Z, LDZ, W, &
                    LDW, ZZERO, S, LDS )                         ! BLAS CALL
        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! INTRINSIC, for T_OR_N=='T'
        ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K)))                 ! INTRINSIC, for T_OR_N=='N'
        ! At this point S = U^H * A * U is the Rayleigh quotient.
        ! If the residuals are requested, save scaled V_k into Z.
        ! Recall that V_k or V_k^H is stored in W.
        IF ( WNTRES .OR. WNTEX ) THEN
          IF ( LSAME(T_OR_N, 'N') ) THEN
              CALL ZLACPY( 'A', N, K, W, LDW, Z, LDZ )
          ELSE
              CALL ZLACPY( 'A', K, N, W, LDW, Z, LDZ )
          END IF
        END IF
      END IF
 !
      !<5> Compute the Ritz values and (if requested) the
      !   right eigenvectors of the Rayleigh quotient.
      !
      CALL ZGEEV( 'N', JOBZL, K, S, LDS, EIGS, W, LDW, &
            W, LDW, ZWORK, LZWORK, RWORK(N+1), INFO1 )  ! LAPACK CALL
      !
      ! W(1:K,1:K) contains the eigenvectors of the Rayleigh
      ! quotient.  See the description of Z.
      ! Also, see the description of ZGEEV.
      IF ( INFO1 > 0 ) THEN
         ! ZGEEV failed to compute the eigenvalues and
         ! eigenvectors of the Rayleigh quotient.
         INFO = 3
         RETURN
      END IF
 !
      ! <6> Compute the eigenvectors (if requested) and,
      ! the residuals (if requested).
      !
      IF ( WNTVEC .OR. WNTEX ) THEN
      IF ( WNTRES ) THEN
          IF ( WNTREF ) THEN
            ! Here, if the refinement is requested, we have
            ! A*U(:,1:K) already computed and stored in Z.
            ! For the residuals, need Y = A * U(:,1;K) * W.
            CALL ZGEMM( 'N', 'N', M, K, K, ZONE, Z, LDZ, W, &
                       LDW, ZZERO, Y, LDY )               ! BLAS CALL
            ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K)        ! INTRINSIC
            ! This frees Z; Y contains A * U(:,1:K) * W.
          ELSE
            ! Compute S = V_k * Sigma_k^(-1) * W, where
            ! V_k * Sigma_k^(-1) (or its adjoint) is stored in Z
            CALL ZGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
                       W, LDW, ZZERO, S, LDS )
            ! Then, compute Z = Y * S =
            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
            ! = A * U(:,1:K) * W(1:K,1:K)
            CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
                       LDS, ZZERO, Z, LDZ )
            ! Save a copy of Z into Y and free Z for holding
            ! the Ritz vectors.
            CALL ZLACPY( 'A', M, K, Z, LDZ, Y, LDY )
            IF ( WNTEX ) CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB )
          END IF
      ELSE IF ( WNTEX ) THEN
          ! Compute S = V_k * Sigma_k^(-1) * W, where
            ! V_k * Sigma_k^(-1) is stored in Z
            CALL ZGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, &
                       W, LDW, ZZERO, S, LDS )
            ! Then, compute Z = Y * S =
            ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) =
            ! = A * U(:,1:K) * W(1:K,1:K)
            CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
                       LDS, ZZERO, B, LDB )
            ! The above call replaces the following two calls
            ! that were used in the developing-testing phase.
            ! CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, &
            !           LDS, ZZERO, Z, LDZ)
            ! Save a copy of Z into B and free Z for holding
            ! the Ritz vectors.
            ! CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB )
      END IF
 !
      ! Compute the Ritz vectors
      IF ( WNTVEC ) CALL ZGEMM( 'N', 'N', M, K, K, ZONE, X, LDX, W, LDW, &
                   ZZERO, Z, LDZ )                          ! BLAS CALL
      ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K))         ! INTRINSIC
 !
      IF ( WNTRES ) THEN
         DO i = 1, K
            CALL ZAXPY( M, -EIGS(i), Z(1,i), 1, Y(1,i), 1 )       ! BLAS CALL
            ! Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i)            ! INTRINSIC
            RES(i) = DZNRM2( M, Y(1,i), 1 )                       ! BLAS CALL
         END DO
      END IF
      END IF
 !
      IF ( WHTSVD == 4 ) THEN
          RWORK(N+1) = XSCL1
          RWORK(N+2) = XSCL2
      END IF
 !
 !     Successful exit.
      IF ( .NOT. BADXY ) THEN
         INFO = 0
      ELSE
         ! A warning on possible data inconsistency.
         ! This should be a rare event.
         INFO = 4
      END IF
 !............................................................
      RETURN
 !     ......
      END SUBROUTINE ZGEDMD

--- a/lapack-netlib/SRC/zgedmdq.f90
+++ b/lapack-netlib/SRC/zgedmdq.f90
@@ -0,0 +1,689 @@
 SUBROUTINE ZGEDMDQ( JOBS,  JOBZ, JOBR, JOBQ, JOBT, JOBF,   &
                    WHTSVD,   M, N, F, LDF,  X, LDX,  Y,   &
                    LDY,   NRNK,  TOL,   K,  EIGS,         &
                    Z, LDZ, RES,  B,     LDB,   V, LDV,    & 
                    S, LDS, ZWORK, LZWORK, WORK,  LWORK,   &
                    IWORK, LIWORK, INFO )
 ! March 2023
 !.....
      USE                   iso_fortran_env
      IMPLICIT NONE
      INTEGER, PARAMETER :: WP = real64
 !.....      
 !     Scalar arguments       
      CHARACTER, INTENT(IN)  :: JOBS, JOBZ, JOBR, JOBQ,    &
                                JOBT, JOBF
      INTEGER,   INTENT(IN)  :: WHTSVD, M, N,   LDF, LDX,  &
                                LDY, NRNK, LDZ, LDB, LDV,  &
                                LDS, LZWORK,  LWORK, LIWORK
      INTEGER,   INTENT(OUT) :: INFO,   K      
      REAL(KIND=WP), INTENT(IN)    ::   TOL     
 !     Array arguments      
      COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*)
      COMPLEX(KIND=WP), INTENT(OUT)   :: X(LDX,*), Y(LDY,*), &
                                         Z(LDZ,*), B(LDB,*), &
                                         V(LDV,*), S(LDS,*)
      COMPLEX(KIND=WP), INTENT(OUT)   :: EIGS(*)
      COMPLEX(KIND=WP), INTENT(OUT)   :: ZWORK(*)
      REAL(KIND=WP), INTENT(OUT)   :: RES(*)
      REAL(KIND=WP), INTENT(OUT)   :: WORK(*)  
      INTEGER,       INTENT(OUT)   :: IWORK(*)
 !.....      
 !     Purpose  
 !     =======
 !     ZGEDMDQ computes the Dynamic Mode Decomposition (DMD) for
 !     a pair of data snapshot matrices, using a QR factorization
 !     based compression of the data. For the input matrices
 !     X and Y such that Y = A*X with an unaccessible matrix
 !     A, ZGEDMDQ computes a certain number of Ritz pairs of A using
 !     the standard Rayleigh-Ritz extraction from a subspace of
 !     range(X) that is determined using the leading left singular 
 !     vectors of X. Optionally, ZGEDMDQ returns the residuals 
 !     of the computed Ritz pairs, the information needed for
 !     a refinement of the Ritz vectors, or the eigenvectors of
 !     the Exact DMD.
 !     For further details see the references listed
 !     below. For more details of the implementation see [3].      
 !
 !     References
 !     ==========
 !     [1] P. Schmid: Dynamic mode decomposition of numerical
 !         and experimental data,
 !         Journal of Fluid Mechanics 656, 5-28, 2010.
 !     [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal
 !         decompositions: analysis and enhancements,
 !         SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018.
 !     [3] Z. Drmac: A LAPACK implementation of the Dynamic
 !         Mode Decomposition I. Technical report. AIMDyn Inc.
 !         and LAPACK Working Note 298.      
 !     [4] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. 
 !         Brunton, N. Kutz: On Dynamic Mode Decomposition:
 !         Theory and Applications, Journal of Computational
 !         Dynamics 1(2), 391 -421, 2014.
 !
 !     Developed and supported by:
 !     ===========================
 !     Developed and coded by Zlatko Drmac, Faculty of Science,
 !     University of Zagreb;  drmac@math.hr
 !     In cooperation with
 !     AIMdyn Inc., Santa Barbara, CA.
 !     and supported by
 !     - DARPA SBIR project "Koopman Operator-Based Forecasting
 !     for Nonstationary Processes from Near-Term, Limited
 !     Observational Data" Contract No: W31P4Q-21-C-0007
 !     - DARPA PAI project "Physics-Informed Machine Learning
 !     Methodologies" Contract No: HR0011-18-9-0033
 !     - DARPA MoDyL project "A Data-Driven, Operator-Theoretic
 !     Framework for Space-Time Analysis of Process Dynamics"
 !     Contract No: HR0011-16-C-0116
 !     Any opinions, findings and conclusions or recommendations 
 !     expressed in this material are those of the author and 
 !     do not necessarily reflect the views of the DARPA SBIR 
 !     Program Office.      
 !============================================================
 !     Distribution Statement A: 
 !     Approved for Public Release, Distribution Unlimited.
 !     Cleared by DARPA on September 29, 2022
 !============================================================      
 !......................................................................      
 !     Arguments
 !     =========
 !     JOBS (input) CHARACTER*1
 !     Determines whether the initial data snapshots are scaled
 !     by a diagonal matrix. The data snapshots are the columns
 !     of F. The leading N-1 columns of F are denoted X and the
 !     trailing N-1 columns are denoted Y. 
 !     'S' :: The data snapshots matrices X and Y are multiplied
 !            with a diagonal matrix D so that X*D has unit
 !            nonzero columns (in the Euclidean 2-norm)
 !     'C' :: The snapshots are scaled as with the 'S' option.
 !            If it is found that an i-th column of X is zero
 !            vector and the corresponding i-th column of Y is
 !            non-zero, then the i-th column of Y is set to
 !            zero and a warning flag is raised.
 !     'Y' :: The data snapshots matrices X and Y are multiplied
 !            by a diagonal matrix D so that Y*D has unit
 !            nonzero columns (in the Euclidean 2-norm)    
 !     'N' :: No data scaling.   
 !.....
 !     JOBZ (input) CHARACTER*1
 !     Determines whether the eigenvectors (Koopman modes) will
 !     be computed.
 !     'V' :: The eigenvectors (Koopman modes) will be computed
 !            and returned in the matrix Z.
 !            See the description of Z.
 !     'F' :: The eigenvectors (Koopman modes) will be returned
 !            in factored form as the product Z*V, where Z
 !            is orthonormal and V contains the eigenvectors
 !            of the corresponding Rayleigh quotient.
 !            See the descriptions of F, V, Z.
 !     'Q' :: The eigenvectors (Koopman modes) will be returned
 !            in factored form as the product Q*Z, where Z
 !            contains the eigenvectors of the compression of the
 !            underlying discretized operator onto the span of
 !            the data snapshots. See the descriptions of F, V, Z.
 !            Q is from the initial QR factorization.  
 !     'N' :: The eigenvectors are not computed.  
 !.....      
 !     JOBR (input) CHARACTER*1 
 !     Determines whether to compute the residuals.
 !     'R' :: The residuals for the computed eigenpairs will
 !            be computed and stored in the array RES.
 !            See the description of RES.
 !            For this option to be legal, JOBZ must be 'V'.
 !     'N' :: The residuals are not computed.
 !.....
 !     JOBQ (input) CHARACTER*1 
 !     Specifies whether to explicitly compute and return the
 !     unitary matrix from the QR factorization.
 !     'Q' :: The matrix Q of the QR factorization of the data
 !            snapshot matrix is computed and stored in the
 !            array F. See the description of F.       
 !     'N' :: The matrix Q is not explicitly computed.
 !.....
 !     JOBT (input) CHARACTER*1 
 !     Specifies whether to return the upper triangular factor
 !     from the QR factorization.
 !     'R' :: The matrix R of the QR factorization of the data 
 !            snapshot matrix F is returned in the array Y.
 !            See the description of Y and Further details.       
 !     'N' :: The matrix R is not returned. 
 !.....
 !     JOBF (input) CHARACTER*1
 !     Specifies whether to store information needed for post-
 !     processing (e.g. computing refined Ritz vectors)
 !     'R' :: The matrix needed for the refinement of the Ritz
 !            vectors is computed and stored in the array B.
 !            See the description of B.
 !     'E' :: The unscaled eigenvectors of the Exact DMD are 
 !            computed and returned in the array B. See the
 !            description of B.
 !     'N' :: No eigenvector refinement data is computed.   
 !     To be useful on exit, this option needs JOBQ='Q'.    
 !.....
 !     WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 }
 !     Allows for a selection of the SVD algorithm from the
 !     LAPACK library.
 !     1 :: ZGESVD (the QR SVD algorithm)
 !     2 :: ZGESDD (the Divide and Conquer algorithm; if enough
 !          workspace available, this is the fastest option)
 !     3 :: ZGESVDQ (the preconditioned QR SVD  ; this and 4
 !          are the most accurate options)
 !     4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3
 !          are the most accurate options)
 !     For the four methods above, a significant difference in
 !     the accuracy of small singular values is possible if
 !     the snapshots vary in norm so that X is severely
 !     ill-conditioned. If small (smaller than EPS*||X||)
 !     singular values are of interest and JOBS=='N',  then
 !     the options (3, 4) give the most accurate results, where
 !     the option 4 is slightly better and with stronger 
 !     theoretical background.
 !     If JOBS=='S', i.e. the columns of X will be normalized,
 !     then all methods give nearly equally accurate results.
 !.....
 !     M (input) INTEGER, M >= 0 
 !     The state space dimension (the number of rows of F).
 !.....      
 !     N (input) INTEGER, 0 <= N <= M
 !     The number of data snapshots from a single trajectory,
 !     taken at equidistant discrete times. This is the 
 !     number of columns of F.
 !.....
 !     F (input/output) COMPLEX(KIND=WP) M-by-N array
 !     > On entry,
 !     the columns of F are the sequence of data snapshots 
 !     from a single trajectory, taken at equidistant discrete
 !     times. It is assumed that the column norms of F are 
 !     in the range of the normalized floating point numbers. 
 !     < On exit,
 !     If JOBQ == 'Q', the array F contains the orthogonal 
 !     matrix/factor of the QR factorization of the initial 
 !     data snapshots matrix F. See the description of JOBQ. 
 !     If JOBQ == 'N', the entries in F strictly below the main
 !     diagonal contain, column-wise, the information on the 
 !     Householder vectors, as returned by ZGEQRF. The 
 !     remaining information to restore the orthogonal matrix
 !     of the initial QR factorization is stored in ZWORK(1:MIN(M,N)). 
 !     See the description of ZWORK.
 !.....
 !     LDF (input) INTEGER, LDF >= M 
 !     The leading dimension of the array F.
 !.....
 !     X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array
 !     X is used as workspace to hold representations of the
 !     leading N-1 snapshots in the orthonormal basis computed
 !     in the QR factorization of F.
 !     On exit, the leading K columns of X contain the leading
 !     K left singular vectors of the above described content
 !     of X. To lift them to the space of the left singular
 !     vectors U(:,1:K) of the input data, pre-multiply with the 
 !     Q factor from the initial QR factorization. 
 !     See the descriptions of F, K, V  and Z.
 !.....      
 !     LDX (input) INTEGER, LDX >= N  
 !     The leading dimension of the array X. 
 !.....
 !     Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N) array
 !     Y is used as workspace to hold representations of the
 !     trailing N-1 snapshots in the orthonormal basis computed
 !     in the QR factorization of F.
 !     On exit, 
 !     If JOBT == 'R', Y contains the MIN(M,N)-by-N upper
 !     triangular factor from the QR factorization of the data
 !     snapshot matrix F.
 !.....      
 !     LDY (input) INTEGER , LDY >= N
 !     The leading dimension of the array Y.   
 !.....
 !     NRNK (input) INTEGER
 !     Determines the mode how to compute the numerical rank,
 !     i.e. how to truncate small singular values of the input
 !     matrix X. On input, if
 !     NRNK = -1 :: i-th singular value sigma(i) is truncated
 !                  if sigma(i) <= TOL*sigma(1)
 !                  This option is recommended.  
 !     NRNK = -2 :: i-th singular value sigma(i) is truncated
 !                  if sigma(i) <= TOL*sigma(i-1)
 !                  This option is included for R&D purposes.
 !                  It requires highly accurate SVD, which
 !                  may not be feasible.      
 !     The numerical rank can be enforced by using positive 
 !     value of NRNK as follows: 
 !     0 < NRNK <= N-1 :: at most NRNK largest singular values
 !     will be used. If the number of the computed nonzero
 !     singular values is less than NRNK, then only those
 !     nonzero values will be used and the actually used
 !     dimension is less than NRNK. The actual number of
 !     the nonzero singular values is returned in the variable
 !     K. See the description of K.
 !.....
 !     TOL (input) REAL(KIND=WP), 0 <= TOL < 1
 !     The tolerance for truncating small singular values.
 !     See the description of NRNK.  
 !.....
 !     K (output) INTEGER,  0 <= K <= N 
 !     The dimension of the SVD/POD basis for the leading N-1
 !     data snapshots (columns of F) and the number of the 
 !     computed Ritz pairs. The value of K is determined
 !     according to the rule set by the parameters NRNK and 
 !     TOL. See the descriptions of NRNK and TOL. 
 !.....
 !     EIGS (output) COMPLEX(KIND=WP) (N-1)-by-1 array
 !     The leading K (K<=N-1) entries of EIGS contain
 !     the computed eigenvalues (Ritz values).
 !     See the descriptions of K, and Z.
 !.....
 !     Z (workspace/output) COMPLEX(KIND=WP)  M-by-(N-1) array
 !     If JOBZ =='V' then Z contains the Ritz vectors. Z(:,i)
 !     is an eigenvector of the i-th Ritz value; ||Z(:,i)||_2=1.
 !     If JOBZ == 'F', then the Z(:,i)'s are given implicitly as
 !     Z*V, where Z contains orthonormal matrix (the product of
 !     Q from the initial QR factorization and the SVD/POD_basis
 !     returned by ZGEDMD in X) and the second factor (the 
 !     eigenvectors of the Rayleigh quotient) is in the array V, 
 !     as returned by ZGEDMD. That is,  X(:,1:K)*V(:,i)
 !     is an eigenvector corresponding to EIGS(i). The columns 
 !     of V(1:K,1:K) are the computed eigenvectors of the 
 !     K-by-K Rayleigh quotient.  
 !     See the descriptions of EIGS, X and V.      
 !.....
 !     LDZ (input) INTEGER , LDZ >= M
 !     The leading dimension of the array Z.
 !.....
 !     RES (output) REAL(KIND=WP) (N-1)-by-1 array
 !     RES(1:K) contains the residuals for the K computed 
 !     Ritz pairs, 
 !     RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2.
 !     See the description of EIGS and Z.      
 !.....
 !     B (output) COMPLEX(KIND=WP)  MIN(M,N)-by-(N-1) array.
 !     IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can
 !     be used for computing the refined vectors; see further 
 !     details in the provided references. 
 !     If JOBF == 'E', B(1:N,1;K) contains 
 !     A*U(:,1:K)*W(1:K,1:K), which are the vectors from the
 !     Exact DMD, up to scaling by the inverse eigenvalues.   
 !     In both cases, the content of B can be lifted to the 
 !     original dimension of the input data by pre-multiplying
 !     with the Q factor from the initial QR factorization.   
 !     Here A denotes a compression of the underlying operator.      
 !     See the descriptions of F and X.
 !     If JOBF =='N', then B is not referenced.
 !.....
 !     LDB (input) INTEGER, LDB >= MIN(M,N)
 !     The leading dimension of the array B.
 !.....
 !     V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
 !     On exit, V(1:K,1:K) V contains the K eigenvectors of
 !     the Rayleigh quotient. The Ritz vectors
 !     (returned in Z) are the product of Q from the initial QR
 !     factorization (see the description of F) X (see the 
 !     description of X) and V.
 !.....
 !     LDV (input) INTEGER, LDV >= N-1
 !     The leading dimension of the array V.
 !.....      
 !     S (output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array
 !     The array S(1:K,1:K) is used for the matrix Rayleigh
 !     quotient. This content is overwritten during
 !     the eigenvalue decomposition by ZGEEV.
 !     See the description of K.
 !.....
 !     LDS (input) INTEGER, LDS >= N-1        
 !     The leading dimension of the array S.
 !.....
 !     ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array
 !     On exit, 
 !     ZWORK(1:MIN(M,N)) contains the scalar factors of the 
 !     elementary reflectors as returned by ZGEQRF of the 
 !     M-by-N input matrix F.   
 !     If the call to ZGEDMDQ is only workspace query, then
 !     ZWORK(1) contains the minimal complex workspace length and
 !     ZWORK(2) is the optimal complex workspace length. 
 !     Hence, the length of work is at least 2.
 !     See the description of LZWORK.      
 !.....      
 !     LZWORK (input) INTEGER
 !     The minimal length of the  workspace vector ZWORK.
 !     LZWORK is calculated as follows:
 !     Let MLWQR  = N (minimal workspace for ZGEQRF[M,N])
 !         MLWDMD = minimal workspace for ZGEDMD (see the
 !                  description of LWORK in ZGEDMD)
 !         MLWMQR = N (minimal workspace for 
 !                    ZUNMQR['L','N',M,N,N])
 !         MLWGQR = N (minimal workspace for ZUNGQR[M,N,N])
 !         MINMN  = MIN(M,N)      
 !     Then
 !     LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD)
 !     is further updated as follows:
 !        if   JOBZ == 'V' or JOBZ == 'F' THEN 
 !             LZWORK = MAX(LZWORK, MINMN+MLWMQR)
 !        if   JOBQ == 'Q' THEN
 !             LZWORK = MAX(ZLWORK, MINMN+MLWGQR)      
 !
 !.....      
 !     WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array
 !     On exit,
 !     WORK(1:N-1) contains the singular values of 
 !     the input submatrix F(1:M,1:N-1).
 !     If the call to ZGEDMDQ is only workspace query, then
 !     WORK(1) contains the minimal workspace length and
 !     WORK(2) is the optimal workspace length. hence, the
 !     length of work is at least 2.
 !     See the description of LWORK.
 !.....
 !     LWORK (input) INTEGER
 !     The minimal length of the  workspace vector WORK.
 !     LWORK is the same as in ZGEDMD, because in ZGEDMDQ
 !     only ZGEDMD requires real workspace for snapshots
 !     of dimensions MIN(M,N)-by-(N-1). 
 !     If on entry LWORK = -1, then a workspace query is
 !     assumed and the procedure only computes the minimal
 !     and the optimal workspace length for WORK.          
 !.....
 !     IWORK (workspace/output) INTEGER LIWORK-by-1 array
 !     Workspace that is required only if WHTSVD equals
 !     2 , 3 or 4. (See the description of WHTSVD).
 !     If on entry LWORK =-1 or LIWORK=-1, then the
 !     minimal length of IWORK is computed and returned in
 !     IWORK(1). See the description of LIWORK.
 !.....
 !     LIWORK (input) INTEGER
 !     The minimal length of the workspace vector IWORK.
 !     If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1
 !     Let M1=MIN(M,N), N1=N-1. Then
 !     If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M1,N1))
 !     If WHTSVD == 3, then LIWORK >= MAX(1,M1+N1-1)
 !     If WHTSVD == 4, then LIWORK >= MAX(3,M1+3*N1)
 !     If on entry LIWORK = -1, then a workspace query is
 !     assumed and the procedure only computes the minimal
 !     and the optimal workspace lengths for both WORK and
 !     IWORK. See the descriptions of WORK and IWORK.
 !..... 
 !     INFO (output) INTEGER
 !     -i < 0 :: On entry, the i-th argument had an
 !               illegal value
 !        = 0 :: Successful return.
 !        = 1 :: Void input. Quick exit (M=0 or N=0).
 !        = 2 :: The SVD computation of X did not converge.
 !               Suggestion: Check the input data and/or
 !               repeat with different WHTSVD.
 !        = 3 :: The computation of the eigenvalues did not
 !               converge.
 !        = 4 :: If data scaling was requested on input and
 !               the procedure found inconsistency in the data
 !               such that for some column index i,
 !               X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set
 !               to zero if JOBS=='C'. The computation proceeds
 !               with original or modified data and warning
 !               flag is set with INFO=4.  
 !.............................................................
 !.............................................................
 !     Parameters
 !     ~~~~~~~~~~      
      REAL(KIND=WP), PARAMETER ::  ONE = 1.0_WP
      REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP
 !     COMPLEX(KIND=WP), PARAMETER ::  ZONE = ( 1.0_WP, 0.0_WP )
      COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP )
 !      
 !     Local scalars      
 !     ~~~~~~~~~~~~~
      INTEGER           :: IMINWR, INFO1,  MINMN, MLRWRK,   &
                           MLWDMD, MLWGQR, MLWMQR, MLWORK,  & 
                           MLWQR,  OLWDMD, OLWGQR, OLWMQR,  &
                           OLWORK, OLWQR
      LOGICAL           :: LQUERY, SCCOLX, SCCOLY, WANTQ,  &
                           WNTTRF, WNTRES, WNTVEC, WNTVCF, &
                           WNTVCQ, WNTREF, WNTEX
      CHARACTER(LEN=1)  :: JOBVL
 !      
 !     External functions (BLAS and LAPACK)
 !     ~~~~~~~~~~~~~~~~~
      LOGICAL       LSAME
      EXTERNAL      LSAME 
 !
 !     External subroutines (BLAS and LAPACK)
 !     ~~~~~~~~~~~~~~~~~~~~
      EXTERNAL      ZGEQRF, ZLACPY, ZLASET, ZUNGQR, & 
                    ZUNMQR, XERBLA

 !     External subroutines
 !     ~~~~~~~~~~~~~~~~~~~~
      EXTERNAL      ZGEDMD 
      
 !     Intrinsic functions
 !     ~~~~~~~~~~~~~~~~~~~
      INTRINSIC      MAX, MIN, INT         
 !..........................................................  
 !
 !    Test the input arguments    
      WNTRES = LSAME(JOBR,'R')
      SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' )
      SCCOLY = LSAME(JOBS,'Y')
      WNTVEC = LSAME(JOBZ,'V')
      WNTVCF = LSAME(JOBZ,'F')
      WNTVCQ = LSAME(JOBZ,'Q') 
      WNTREF = LSAME(JOBF,'R') 
      WNTEX  = LSAME(JOBF,'E')
      WANTQ  = LSAME(JOBQ,'Q')
      WNTTRF = LSAME(JOBT,'R')     
      MINMN  = MIN(M,N)
      INFO = 0 
      LQUERY = ( (LZWORK == -1) .OR. (LWORK == -1) .OR. (LIWORK == -1) )
 !       
      IF ( .NOT. (SCCOLX .OR. SCCOLY .OR.                &
                                  LSAME(JOBS,'N')) )  THEN 
          INFO = -1
      ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ    &
                              .OR. LSAME(JOBZ,'N')) ) THEN
          INFO = -2
      ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR.    & 
          ( WNTRES .AND. LSAME(JOBZ,'N') ) ) THEN
          INFO = -3
      ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN
           INFO = -4                 
      ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) )  THEN
          INFO = -5
       ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR.             & 
                LSAME(JOBF,'N') ) )                     THEN
          INFO = -6    
      ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR.   &
                       (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN
          INFO = -7
      ELSE IF ( M < 0 ) THEN
          INFO = -8
      ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN
          INFO = -9
      ELSE IF ( LDF < M ) THEN
          INFO = -11
      ELSE IF ( LDX < MINMN ) THEN
          INFO = -13
      ELSE IF ( LDY < MINMN ) THEN
          INFO = -15
      ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR.    & 
                       ((NRNK >= 1).AND.(NRNK <=N ))) )  THEN
          INFO = -16
      ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN
          INFO = -17
      ELSE IF ( LDZ < M ) THEN
          INFO = -21
      ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN
          INFO = -24
      ELSE IF ( LDV < N-1 ) THEN
          INFO = -26
      ELSE IF ( LDS < N-1 ) THEN
          INFO = -28
      END IF
 !      
      IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN
          JOBVL = 'V'
      ELSE
          JOBVL = 'N'
      END IF     
      IF ( INFO == 0 ) THEN  
          ! Compute the minimal and the optimal workspace
          ! requirements. Simulate running the code and 
          ! determine minimal and optimal sizes of the 
          ! workspace at any moment of the run.         
         IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN
             ! All output except K is void. INFO=1 signals
             ! the void input. In case of a workspace query,
             ! the minimal workspace lengths are returned.
            IF ( LQUERY ) THEN  
               IWORK(1) = 1
               ZWORK(1) = 2
               ZWORK(2) = 2
               WORK(1)  = 2
               WORK(2)  = 2
            ELSE                
               K = 0
            END IF             
            INFO = 1  
            RETURN
         END IF   
         
         MLRWRK = 2
         MLWORK = 2
         OLWORK = 2 
         IMINWR = 1
         MLWQR  = MAX(1,N)  ! Minimal workspace length for ZGEQRF.
         MLWORK = MAX(MLWORK,MINMN + MLWQR) 
         
         IF ( LQUERY ) THEN 
             CALL ZGEQRF( M, N, F, LDF, ZWORK, ZWORK, -1, &
                          INFO1 )
             OLWQR  = INT(ZWORK(1))
             OLWORK = MAX(OLWORK,MINMN + OLWQR)         
         END IF
         CALL ZGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN,& 
                      N-1, X, LDX, Y, LDY, NRNK, TOL, K,     & 
                      EIGS, Z, LDZ, RES,  B, LDB, V, LDV,    & 
                      S, LDS, ZWORK, -1, WORK, -1, IWORK,&
                      -1, INFO1 )
         MLWDMD = INT(ZWORK(1))
         MLWORK = MAX(MLWORK, MINMN + MLWDMD)
         MLRWRK = MAX(MLRWRK, INT(WORK(1)))
         IMINWR = MAX(IMINWR, IWORK(1))
         IF ( LQUERY ) THEN 
             OLWDMD = INT(ZWORK(2))
             OLWORK = MAX(OLWORK, MINMN+OLWDMD)
         END IF
         IF ( WNTVEC .OR. WNTVCF ) THEN
            MLWMQR = MAX(1,N) 
            MLWORK = MAX(MLWORK,MINMN+MLWMQR)
            IF ( LQUERY ) THEN
               CALL ZUNMQR( 'L','N', M, N, MINMN, F, LDF,  & 
                            ZWORK, Z, LDZ, ZWORK, -1, INFO1 )
               OLWMQR = INT(ZWORK(1))
               OLWORK = MAX(OLWORK,MINMN+OLWMQR)
            END IF
         END IF  
         IF ( WANTQ ) THEN
            MLWGQR = MAX(1,N)
            MLWORK = MAX(MLWORK,MINMN+MLWGQR)
            IF ( LQUERY ) THEN 
                CALL ZUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, &
                             ZWORK, -1, INFO1 )               
                OLWGQR = INT(ZWORK(1))
                OLWORK = MAX(OLWORK,MINMN+OLWGQR)
            END IF            
         END IF         
         IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -34
         IF ( LWORK  < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -32
         IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -30
      END IF  
      IF( INFO /= 0 ) THEN
         CALL XERBLA( 'ZGEDMDQ', -INFO )
         RETURN
      ELSE IF ( LQUERY ) THEN
 !     Return minimal and optimal workspace sizes
          IWORK(1) = IMINWR
          ZWORK(1) = MLWORK
          ZWORK(2) = OLWORK
          WORK(1)  = MLRWRK
          WORK(2)  = MLRWRK
          RETURN
      END IF   
 !.....	  
 !     Initial QR factorization that is used to represent the
 !     snapshots as elements of lower dimensional subspace.
 !     For large scale computation with M >> N, at this place 
 !     one can use an out of core QRF.
 !   
      CALL ZGEQRF( M, N, F, LDF, ZWORK,                & 
                   ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
 !      
 !     Define X and Y as the snapshots representations in the
 !     orthogonal basis computed in the QR factorization.
 !     X corresponds to the leading N-1 and Y to the trailing
 !     N-1 snapshots.
      CALL ZLASET( 'L', MINMN, N-1, ZZERO,  ZZERO, X, LDX )
      CALL ZLACPY( 'U', MINMN, N-1, F,      LDF, X, LDX )
      CALL ZLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY )
      IF ( M >= 3 ) THEN
          CALL ZLASET( 'L', MINMN-2, N-2, ZZERO,  ZZERO, &
                       Y(3,1), LDY )  
      END IF
 !
 !     Compute the DMD of the projected snapshot pairs (X,Y)   
      CALL ZGEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, MINMN, &
                  N-1,  X, LDX, Y, LDY, NRNK,   TOL, K,    &
                  EIGS, Z, LDZ, RES, B,  LDB,   V, LDV,    &
                  S, LDS, ZWORK(MINMN+1), LZWORK-MINMN, & 
                  WORK,   LWORK, IWORK, LIWORK, INFO1 )
      IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN
          ! Return with error code. See ZGEDMD for details.
          INFO = INFO1
          RETURN
      ELSE
          INFO = INFO1
      END IF    
 !      
 !     The Ritz vectors (Koopman modes) can be explicitly 
 !     formed or returned in factored form.
      IF ( WNTVEC ) THEN
        ! Compute the eigenvectors explicitly.  
        IF ( M > MINMN ) CALL ZLASET( 'A', M-MINMN, K, ZZERO, &
                                     ZZERO, Z(MINMN+1,1), LDZ )
        CALL ZUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z,  &
             LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
      ELSE IF ( WNTVCF ) THEN   
        !   Return the Ritz vectors (eigenvectors) in factored
        !   form Z*V, where Z contains orthonormal matrix (the
        !   product of Q from the initial QR factorization and 
        !   the SVD/POD_basis returned by ZGEDMD in X) and the 
        !   second factor (the eigenvectors of the Rayleigh 
        !   quotient) is in the array V, as returned by ZGEDMD.
        CALL ZLACPY( 'A', N, K, X, LDX, Z, LDZ )
        IF ( M > N ) CALL ZLASET( 'A', M-N, K, ZZERO, ZZERO, & 
                                 Z(N+1,1), LDZ )
        CALL ZUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, &
                    LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )
      END IF
 !     
 !     Some optional output variables:
 !
 !     The upper triangular factor R in the initial QR 
 !     factorization is optionally returned in the array Y.
 !     This is useful if this call to ZGEDMDQ is to be 
 !     followed by a streaming DMD that is implemented in a 
 !     QR compressed form.
      IF ( WNTTRF ) THEN ! Return the upper triangular R in Y 
         CALL ZLASET( 'A', MINMN, N, ZZERO,  ZZERO, Y, LDY )
         CALL ZLACPY( 'U', MINMN, N, F, LDF,        Y, LDY )
      END IF    
 !
 !     The orthonormal/unitary factor Q in the initial QR 
 !     factorization is optionally returned in the array F. 
 !     Same as with the triangular factor above, this is 
 !     useful in a streaming DMD.
      IF ( WANTQ ) THEN                   ! Q overwrites F 
         CALL ZUNGQR( M, MINMN, MINMN, F, LDF, ZWORK,     &
                      ZWORK(MINMN+1), LZWORK-MINMN, INFO1 )  
      END IF
 !      
      RETURN
 !      
      END SUBROUTINE ZGEDMDQ